Name the data you export from FileMaker Pro by their exact table names and save them as CSVs, e.g. Larvae_Morphology.csv
#setwd("~/Repos/LarvaeTransGen2018")
setwd("~/R/GitHub/LarvaeTransGen2018/data") #Elise's working directory
#upload all of the data tables for the larvae experiment
Larvae_Morphology <- read.csv("../data/Larvae_Morphology.csv") #contains data from CellProfiler from the larvae outlines
Barcode_Jar <- read.csv("../data/Barcode_Jar.csv") #contains data on CrossID, seatable, and whether or not larvae were present
Block_ID <- read.csv("../data/Block_ID.csv") #contains data on the block
Cross<- read.csv("../data/Cross.csv") #contains data on the female and male IDs for the cross and whether the cross was for QG or Meth
Fert_QG<- read.csv("../data/Fert_QG.csv") #contains data on fertilization counts, the JarIDs for the crosses
Header_WC<- read.csv("../data/Header_WC.csv") #contains data on the header tanks used to fill the jars
Larvae_Calcification <- read.csv("../data/Larvae_Calcification.csv") #contains data on filter weights and counts
Larvae_Counts <- read.csv("../data/Larvae_Counts.csv") #contains data on sedgewick rafter larvae counts
Larvae_WC <- read.csv("../data/Larvae_WC.csv") #contains data on larvae jar water chemistry
Parent_ID <- read.csv("../data/Parent_ID.csv") #contains data on the adults at the time of shucking. Includes Parent ID assignments
Larvae_Cilia<- read.csv("../data/Larvae_Cilia.csv") #contains data on cilia extrusion of the larvae photographed for morphology
Egg_Morphology<- read.csv("../data/Egg_Morphology.csv") #contains data on egg sizes from CellProfiler
Adult_Sample<- read.csv("../data/Adult_Sample.csv") #contains data on adult oysters at the time of collection
WC_Standard<- read.csv("../data/WC_Standard.csv") #contains data on the standards used for water chemistry
Tank_WC<- read.csv("../data/Tank_WC.csv") #contains data on the adult tank water chemistry
Make required fields factors:
Adult_Sample$AccTankID<- as.factor(Adult_Sample$AccTankID)
Adult_Sample$TrtTankID<- as.factor(Adult_Sample$TrtTankID)
Barcode_Jar$JarSeatable<- as.factor(Barcode_Jar$JarSeatable)
Tank_WC$ParentTrt<- as.factor(Tank_WC$ParentTrt)
Tank_WC$Tank<- as.factor(Tank_WC$Tank)
Tank_WC$Folder<- as.factor(Tank_WC$Folder)
WC_Standard$CRM<- as.factor(WC_Standard$CRM)
Larvae_WC$DateFilter<- as.factor(Larvae_WC$DateFilter)
Calculate average larval jar water chemistry based on the subset of bottles that were run on the VINDTA. May need to look to see if we want to use the mean or the median
#start by joining Barcode_Jar and Larvae_WC datasets
JarChem<- merge(x=Barcode_Jar, y= Larvae_WC, by="JarID", all.x=TRUE)
#combine Date filter and JarTrt columns to get a unique combo value for each
JarChem<-unite(JarChem,BlockTrt, c("DateFilter","JarTrt"), sep='', remove=F)
#get only jars with larvae
JarChem<- subset(JarChem, Larvae =="TRUE")
JarChem<- subset(JarChem, DateFilter !="20180810")
boxplot(JarpHSW~BlockTrt, data=JarChem, ylim=c(6,8))
#get means for the parameters
JarMeans<- ddply(JarChem, .(BlockTrt), numcolwise(mean, na.rm=T))
#remove the unnecessary columns from JarMeans
JarMeans<- subset(JarMeans, select= c(BlockTrt,JarAlkCalc, JarpCO2, JarHCO3, JarCO3, JarCO2, JarOmegaCalcite, JarOmegaArg))
#add Est for "estimated" to the column names
colnames(JarMeans)<- paste("Est", colnames(JarMeans), sep="")
#add columns to JarChem for mean carb values
JarChem<- merge(x=JarChem, y=JarMeans, by.x="BlockTrt", by.y="EstBlockTrt", all.x=TRUE)
#use ifelse statement to fill in the na values in the original carb fields
JarChem$JarpCO2<- ifelse((is.na(JarChem$JarpCO2)), paste(JarChem$EstJarpCO2), JarChem$JarpCO2)
JarChem$JarAlkCalc<- ifelse((is.na(JarChem$JarAlkCalc)), paste(JarChem$EstJarAlkCalc), JarChem$JarAlkCalc)
JarChem$JarHCO3<- ifelse((is.na(JarChem$JarHCO3)), paste(JarChem$EstJarHCO3), JarChem$JarHCO3)
JarChem$JarCO3<- ifelse((is.na(JarChem$JarCO3)), paste(JarChem$EstJarCO3), JarChem$JarCO3)
JarChem$JarCO2<- ifelse((is.na(JarChem$JarCO2)), paste(JarChem$EstJarCO2), JarChem$JarCO2)
JarChem$JarOmegaCalcite<- ifelse((is.na(JarChem$JarOmegaCalcite)), paste(JarChem$EstJarOmegaCalcite), JarChem$JarOmegaCalcite)
JarChem$JarOmegaArg<- ifelse((is.na(JarChem$JarOmegaArg)), paste(JarChem$EstJarOmegaArg), JarChem$JarOmegaArg)
#get the carb parameters to numeric
JarChem$JarpCO2<- as.numeric(JarChem$JarpCO2)
JarChem$JarAlkCalc<- as.numeric(JarChem$JarAlkCalc)
JarChem$JarHCO3<- as.numeric(JarChem$JarHCO3)
JarChem$JarCO3<- as.numeric(JarChem$JarCO3)
JarChem$JarCO2<- as.numeric(JarChem$JarCO2)
JarChem$JarOmegaCalcite<- as.numeric(JarChem$JarOmegaCalcite)
JarChem$JarOmegaArg<- as.numeric(JarChem$JarOmegaArg)
#good to go!
Look at the water chemistry data for adult tanks
#subset the tank data to only get blocks 1 and 2
Tank_WCB1B2<- subset(Tank_WC, Block != "B3")
#test to see if tanks are significantly different.
#start by plotting the pCO2 for each tank
boxplot(WCa_pHDIC~Tank, data=Tank_WCB1B2, na.omit=TRUE)
Tank_WCB1B2sub<- subset(Tank_WCB1B2, WCa_pHDIC !="NA")
#check for significant differences in water chemistry for blocks 1 & 2
#pCO2 differences
Ca1<- lm(WCa_pHDIC~ParentTrt*Block*Tank, data=Tank_WCB1B2sub)
#check assumptions
par(mfcol=c(2,2))
plot(Ca1) #there is more variance at the high pCO2 treatment.
par(mfcol=c(1,1))
acf(Ca1)
## Error in complete.cases(object): not all arguments have the same length
Ca2<- lm(WCa_pHDIC~ParentTrt*Block+Tank, data=Tank_WCB1B2sub)
anova(Ca1, Ca2)#choose Ca2
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Block + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 0
Ca3<- lm(WCa_pHDIC~ParentTrt+Block*Tank, data=Tank_WCB1B2sub)
anova(Ca2, Ca3) #unclear which is better
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block * Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 0
Ca4<- lm(WCa_pHDIC~ParentTrt*Tank+Block, data=Tank_WCB1B2sub)
anova(Ca2, Ca4) #unclear which is better
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank + Block
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 1.1102e-15
anova(Ca3, Ca4) #unclear which is better
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank + Block
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 1.1102e-15
Ca5<- lm(WCa_pHDIC~ParentTrt+Block+Tank, data=Tank_WCB1B2sub)
anova(Ca2, Ca5) #choose Ca5
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 0
anova(Ca3, Ca5) #choose Ca5
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block * Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 0
anova(Ca4, Ca5) #choose Ca5
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Tank + Block
## Model 2: WCa_pHDIC ~ ParentTrt + Block + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 -1.1102e-15
Ca6<- lm(WCa_pHDIC~ParentTrt*Block, data=Tank_WCB1B2sub)
anova(Ca5, Ca6) #choose Ca6
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Block
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 67 1.7602 -12 -0.21535 0.6389 0.7998
Ca7<- lm(WCa_pHDIC~ParentTrt+Block, data=Tank_WCB1B2sub)
anova(Ca6, Ca7) #Choose Ca7
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt * Block
## Model 2: WCa_pHDIC ~ ParentTrt + Block
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 67 1.7602
## 2 68 1.8280 -1 -0.067855 2.5829 0.1127
Ca8<- lm(WCa_pHDIC~Block*Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca8) #choose Ca7
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ Block * Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 1.8280
## 2 55 1.5448 13 0.28321 0.7756 0.6815
Ca9<- lm(WCa_pHDIC~Block+Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca9) #unclear which to choose
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ Block + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 1.8280
## 2 55 1.5448 13 0.28321 0.7756 0.6815
Ca10<- lm(WCa_pHDIC~ParentTrt*Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca10) #choose Ca7
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt * Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 1.8280
## 2 55 1.5448 13 0.28321 0.7756 0.6815
anova(Ca9, Ca10)#choose Ca9
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt * Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 2.2205e-15
Ca11<- lm(WCa_pHDIC~ParentTrt+Tank, data=Tank_WCB1B2sub)
anova(Ca7, Ca11) #unclear which to choose
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 1.8280
## 2 55 1.5448 13 0.28321 0.7756 0.6815
anova(Ca9, Ca11) #unclear which to choose
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt + Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 55 1.5448 0 2.2205e-15
Ca12<- lm(WCa_pHDIC~ParentTrt, data=Tank_WCB1B2sub)
anova(Ca7, Ca12) #choose Ca12
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Block
## Model 2: WCa_pHDIC ~ ParentTrt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 1.8280
## 2 69 2.1361 -1 -0.30807 11.46 0.001185 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Ca9, Ca12) #choose Ca12
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ Block + Tank
## Model 2: WCa_pHDIC ~ ParentTrt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 69 2.1361 -14 -0.59127 1.5037 0.1408
anova(Ca11, Ca12)#choose Ca12
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt + Tank
## Model 2: WCa_pHDIC ~ ParentTrt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 55 1.5448
## 2 69 2.1361 -14 -0.59127 1.5037 0.1408
Ca13<- lm(WCa_pHDIC~Block, data=Tank_WCB1B2sub)
anova(Ca12, Ca13)
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt
## Model 2: WCa_pHDIC ~ Block
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 69 2.1361
## 2 69 17.0800 0 -14.944
Ca14<- lm(WCa_pHDIC~Tank, data=Tank_WCB1B2sub)
anova(Ca12, Ca14)
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ ParentTrt
## Model 2: WCa_pHDIC ~ Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 69 2.1361
## 2 55 1.5448 14 0.59127 1.5037 0.1408
anova(Ca13, Ca14)# select Ca14
## Analysis of Variance Table
##
## Model 1: WCa_pHDIC ~ Block
## Model 2: WCa_pHDIC ~ Tank
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 69 17.0800
## 2 55 1.5448 14 15.535 39.507 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check AIC to see what the final model should be
AIC(Ca12) #lowest AIC
## [1] -41.27382
AIC(Ca13)
## [1] 106.3303
AIC(Ca14)
## [1] -36.28299
par(mfcol=c(2,2))
plot(Ca12) #does this look okay to you, Katie?
par(mfcol=c(1,1))
acf(Ca12)
## Error in complete.cases(object): not all arguments have the same length
summary(Ca12)
##
## Call:
## lm(formula = WCa_pHDIC ~ ParentTrt, data = Tank_WCB1B2sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49577 -0.07996 -0.02372 0.09681 0.58061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.30802 0.02932 44.60 <2e-16 ***
## ParentTrt2800 -0.92881 0.04177 -22.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1759 on 69 degrees of freedom
## Multiple R-squared: 0.8776, Adjusted R-squared: 0.8758
## F-statistic: 494.5 on 1 and 69 DF, p-value: < 2.2e-16
#There is a significant effect of ParentTrt, but not tank or block according to best model (lowest AIC). So I would feel confident just using this as a factor.
#create dataframe that has the mean values for each tank
TankMeans<- ddply(Tank_WCB1B2, .(Tank, Block, ParentTrt), numcolwise(mean, na.rm=T))
Join the data into dataframes for analysis.
#First dataframe will be for examining egg morphology for all eggs traced; multiple values for each adult
ParentInfo<- merge(x=Adult_Sample, y=Parent_ID, by="AdultID", all.x=TRUE)
Eggs<- merge(x=Egg_Morphology, y=ParentInfo, by.x="FemaleID", by.y="ParentBlockID", all.x=TRUE)
#add the tank data
Eggs<- merge(x=Eggs, y=TankMeans, by.x="TrtTankID", by.y= "Tank", all.x=TRUE)
#subset the data to exclude Block 3
EggsB1B2<- subset(Eggs, Block !="B3") # in the end we may decide to also exclude Block 2.
EggsYoutliers<- subset(Eggs, Block!="B3") #dataset with the outliers remaining
Visualize the data for eggs to start. Again, using only blocks 1 & 2
#There were some concerns at first that the eggs that were measured were too large. Elise looked into this in April 2020. The literature lists mature eggs for C. virginica as ~60 um in diameter. Check this against our mean eggs
meanegg<- aggregate(EggDiamum~Block, data= EggsB1B2, FUN=mean)
meanegg # these means are in agreement with the average size of a mature egg.
## Block EggDiamum
## 1 B1 62.40381
## 2 B2 63.37102
#now look at the data to find outliers.
#look at the egg measurements for each female
boxplot(EggDiamum~FemaleID, data=EggsB1B2) #CF06_B2 seems to have eggs that are smaller than everyone else
boxplot(EggDiamum~Block*ParentTrt, data=EggsB1B2)
#check for outliers using code from https://www.r-bloggers.com/identify-describe-plot-and-remove-the-outliers-from-the-dataset/. The function uses Tukey's method to ID ouliers ranged above and below the 1.5*IQR.
outlierKD <- function(dt, var) {
var_name <- eval(substitute(var),eval(dt))
na1 <- sum(is.na(var_name))
m1 <- mean(var_name, na.rm = T)
par(mfrow=c(2, 2), oma=c(0,0,3,0))
boxplot(var_name, main="With outliers")
hist(var_name, main="With outliers", xlab=NA, ylab=NA)
outlier <- boxplot.stats(var_name)$out
mo <- mean(outlier)
var_name <- ifelse(var_name %in% outlier, NA, var_name)
boxplot(var_name, main="Without outliers")
hist(var_name, main="Without outliers", xlab=NA, ylab=NA)
title("Outlier Check", outer=TRUE)
na2 <- sum(is.na(var_name))
cat("Outliers identified:", na2 - na1, "n")
cat("Propotion (%) of outliers:", round((na2 - na1) / sum(!is.na(var_name))*100, 1), "n")
cat("Mean of the outliers:", round(mo, 2), "n")
m2 <- mean(var_name, na.rm = T)
cat("Mean without removing outliers:", round(m1, 2), "n")
cat("Mean if we remove outliers:", round(m2, 2), "n")
response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ")
if(response == "y" | response == "yes"){
dt[as.character(substitute(var))] <- invisible(var_name)
assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
cat("Outliers successfully removed", "n")
return(invisible(dt))
} else{
dt[as.character(substitute(var))] <- invisible(var_name)
assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
cat("Outliers successfully removed", "n")
return(invisible(dt))
}
}
outlierKD(EggsB1B2, EggDiamum)
## Outliers identified: 90 nPropotion (%) of outliers: 10.8 nMean of the outliers: 57.85 nMean without removing outliers: 62.88 nMean if we remove outliers: 63.43 nDo you want to remove outliers and to replace with NA? [yes/no]:
## Outliers successfully removed n
#I opted to remove outliers for now.
#Outliers identified: 90 nPropotion (%) of outliers: 10.8 nMean of the outliers: 57.85 nMean without removing outliers: 62.88 nMean if we remove outliers: 63.43 n
#count how many eggs are left per female after removing the outliers.
outrem<- aggregate(EggDiamum~FemaleID, data=EggsB1B2, FUN=length)
#there are three females that have 10 or fewer eggs : CF06_B2, CF08_B2, and EF07_B1
#remove females with fewer than 10 eggs
EggsB1B2sub<- subset(EggsB1B2, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")
#moving forward for Egg data, I will be using EggsB1B2sub
#Plot again
par(mfcol=c(1,1))
boxplot(EggDiamum~FemaleID, data=EggsB1B2sub)
boxplot(EggDiamum~Block*ParentTrt, data=EggsB1B2sub)
#note that the eggs were filtered through a 70 um filter, but it appears many of them that were that large or larger made it through. Katie, should we remove these as well? I don't really think we should.
#plot histograms of the egg data with outliers removed
ggplot(EggsB1B2sub, aes(x=EggDiamum, color=ParentTrt))+
geom_histogram(alpha=0.5, position="identity") +
facet_grid(~Block)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 59 rows containing non-finite values (stat_bin).
eggmeans<- aggregate(EggDiamum~ParentTrt, data=EggsB1B2sub, FUN=mean) #400 = 63.76692; 2800= 63.12195
eggmedians<- aggregate(EggDiamum~ParentTrt, data=EggsB1B2sub, FUN=median)#400 = 63.10395; 2800= 62.68700
#I will use the mean egg size.
Make datasets without the egg size outliers
#start by removing the outliers from Egg_Morphology
Egg_MorphNout<- Egg_Morphology
outlierKD(Egg_MorphNout, EggDiamum)
## Outliers identified: 131 nPropotion (%) of outliers: 11.1 nMean of the outliers: 53.51 nMean without removing outliers: 61.97 nMean if we remove outliers: 62.9 nDo you want to remove outliers and to replace with NA? [yes/no]:
## Outliers successfully removed n
par(mfcol=c(1,1))
meanegg<- ddply(Egg_MorphNout, .(FemaleID), numcolwise(mean, na.rm=T)) #get mean egg size
FemAdults<- merge(x=ParentInfo, y= meanegg, by.x="ParentBlockID", by.y= "FemaleID", all.y=TRUE)
#Next dataframe will be for one line per traced larva, includes all the data for that jar, including average egg size for that jar
#EggsB1B2sub dataframe has all of the info that we need for the
Larvae<-merge(x=Larvae_Morphology, y=JarChem, by="JarID", all.x=TRUE)
#use gather function in tidyr to make the Fert_QG Jar ID columns in long form vs. wide form
Fert_QG_long <- Fert_QG %>% tidyr::gather(Key, JarID, JarID1:JarID6)
## Warning: attributes are not identical across measure variables;
## they will be dropped
#use the gathered data frame to merge with Larvae
Larvae<-merge(x=Larvae, y=Fert_QG_long, by.x= c("JarID", "CrossID"), by.y=c("JarID", "CrossID"), all.x=TRUE) #did the join by two columns so CrossID column wasn't duplicated
Larvae<- merge(x=Larvae, y=Cross, by="CrossID", all.x=TRUE)
Larvae<- merge(x=Larvae, y=FemAdults, by.x= "FemaleID", by.y="ParentBlockID", all.x=TRUE)
Larvae<- merge(x=Larvae, y= Larvae_Counts, by="JarID", all.x=TRUE)
Larvae<- merge(x=Larvae, y=TankMeans, by.x= "TrtTankID", by.y="Tank", all.x=TRUE)
#Remove jars without larvae
Larvae<- subset(Larvae, Larvae=="TRUE")
#get the growth per day for the larvae. Katie, do we have a column with the larvae age?
Larvae$GrowthPerDay<- (Larvae$LarvaeDiamum-Larvae$EggDiamum)/3
#Final dataframe will be one line per Jar, includes all data for the jar, including the average larvae size per jar and cilia
LarvByJar<-merge(x=Larvae_Calcification, y=Larvae_Cilia, by="JarID", all.x=TRUE, all.y=TRUE)
LarvByJar<- merge(x=LarvByJar, y=JarChem, by="JarID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Fert_QG_long, by.x=c("JarID", "CrossID"), by.y=c("JarID","CrossID"), all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Cross, by="CrossID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=FemAdults, by.x="FemaleID", by.y="ParentBlockID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=Larvae_Counts, by="JarID", all.x=TRUE)
LarvByJar<- merge(x=LarvByJar, y=TankMeans, by.x= "TrtTankID", by.y="Tank", all.x=TRUE)
#get the means for larvae morphology and add to LarvByJar
MeanLarvMorph<- ddply(Larvae_Morphology, .(JarID), numcolwise(mean, na.rm=T))
SELarvMorph<- ddply(Larvae_Morphology, .(JarID), numcolwise(se, na.rm=T)) #get standard error of larvae size
MeanLarvMorph<- full_join(MeanLarvMorph,SELarvMorph, by=c("JarID", "JarID"),suffix=c("","SE")) #join the mean and se larvae data
LarvByJar<- merge(x=LarvByJar, y=MeanLarvMorph, by="JarID", all.x=TRUE)
#Now let's add the survival data. See notes on "Larvae Survival" for more info on how we decided to count total larvae in the jar
#Make columns for the two larvae counts for 1_5 and 2
LarvByJar$v1_5SurvCount<- LarvByJar$SWRTotal+LarvByJar$F2LarvaeCount
LarvByJar$v2SurvCount<- LarvByJar$F1LarvaeCount+LarvByJar$F2LarvaeCount
#use ifelse statement to define the LarvaeSurvived column
LarvByJar$LarvaeSurvived<- ifelse(LarvByJar$ProtocolVersion =="1", paste(LarvByJar$TotalLarvae), ifelse(LarvByJar$ProtocolVersion =="1_5", paste(LarvByJar$v1_5SurvCount), paste(LarvByJar$v2SurvCount)))
LarvByJar$LarvaeSurvived<- as.numeric(LarvByJar$LarvaeSurvived)
## Warning: NAs introduced by coercion
#elise checked the above code to make sure it resulted in the correct calculation and it does.
#now get the estimate of how many larvae were originally added to the jar.
LarvByJar$LarvaeStocked<- LarvByJar$LarvaePermL*LarvByJar$mLJarActual
LarvByJar$PropSurvived<- LarvByJar$LarvaeSurvived/LarvByJar$LarvaeStocked
#remove jars that did not have larvae:
LarvByJar<- subset(LarvByJar, Larvae=="TRUE")
#now remove the data we do not want to analyze.
#For the Larvae datasets, that means removing Block 3, Block 2 control jars; Larvae from Females CF06_B2 and CF08_B2
LarvaeDat<- subset(Larvae, Block == "B1")
LarvaeB2e<- subset(Larvae, Block=="B2")
LarvaeB2e<- subset(LarvaeB2e, JarTrt == "Exposed")
LarvaeDat<- rbind(LarvaeDat, LarvaeB2e)
LarvaeDat<- subset(LarvaeDat, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")
LarvaeDatB1<- subset(LarvaeDat, Block=="B1")
LarvByJarDat<- subset(LarvByJar, Block == "B1")
LarvByJarB2e<- subset(LarvByJar, Block=="B2")
LarvByJarB2e<- subset(LarvByJarB2e, JarTrt == "Exposed")
LarvByJarDat<- rbind(LarvByJarDat, LarvByJarB2e)
LarvByJarDat<- subset(LarvByJarDat, FemaleID != "CF06_B2" & FemaleID != "CF08_B2")
Test for significant effect of parent treatment on egg size. Start with measured water chem
#to test the simple models, I need to remove data that don't have an associated water chem from pH and DIC, hopefully these bottles can be found and added to the dataset, but for now I will need to omit those
CalEggs<- subset(EggsB1B2sub, WCa_pHDIC != "NA")
#start with models that have the response variable EggDiamum
eggdiam1<- lmer(EggDiamum~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)
#Check assumptions
par(mfcol=c(1,1))
qqnorm(resid(eggdiam1))
qqline(resid(eggdiam1))
plot(eggdiam1)
acf(eggdiam1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggdiam1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## EggDiamum ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 | Block) +
## (1 | AccTankID)
## Data: CalEggs
##
## REML criterion at convergence: 4850.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6026 -0.6030 -0.0577 0.5649 3.3588
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 6.153e+00 2.480e+00
## AccTankID (Intercept) 4.709e-13 6.862e-07
## Block (Intercept) 4.008e-02 2.002e-01
## Residual 1.936e+01 4.400e+00
## Number of obs: 826, groups: FemaleID, 30; AccTankID, 6; Block, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 60.7388 4.5229 25.3650 13.429 4.93e-13 ***
## WCa_pHDIC 3.5893 4.3469 24.8490 0.826 0.417
## AdultLength 0.6196 1.4016 24.9670 0.442 0.662
## WCa_pHDIC:AdultLength -0.8390 1.3123 24.9140 -0.639 0.528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC AdltLn
## WCa_pHDIC -0.884
## AdultLength -0.976 0.852
## WC_pHDIC:AL 0.875 -0.970 -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggdiam2<- lmer(EggDiamum~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggdiam1, eggdiam2)#Choose eggdiam2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggDiamum ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 | Block) +
## object: (1 | AccTankID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 5 4866.0 4889.5 -2428.0 4856.0
## object 8 4871.4 4909.1 -2427.7 4855.4 0.548 3 0.9082
eggdiam3<- lm(EggDiamum~WCa_pHDIC, data=CalEggs)
anova(eggdiam2, eggdiam3)#choose eggdiam2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 4996.6 5010.8 -2495.3 4990.6
## object 5 4866.0 4889.5 -2428.0 4856.0 134.69 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggdiam4<- lmer(EggDiamum~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggdiam2, eggdiam4)#choose eggdiam4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ WCa_pHDIC + (1 | FemaleID)
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 4 4864 4882.8 -2428 4856
## object 5 4866 4889.5 -2428 4856 0 1 1
eggdiam5<- lmer(EggDiamum ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggdiam4, eggdiam5) #choose eggdiam5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggDiamum ~ 1 + (1 | FemaleID)
## object: EggDiamum ~ WCa_pHDIC + (1 | FemaleID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 4862.7 4876.9 -2428.3 4856.7
## object 4 4864.0 4882.8 -2428.0 4856.0 0.7443 1 0.3883
summary(eggdiam5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggDiamum ~ 1 + (1 | FemaleID)
## Data: CalEggs
##
## REML criterion at convergence: 4856.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5945 -0.6051 -0.0562 0.5569 3.3466
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 5.722 2.392
## Residual 19.360 4.400
## Number of obs: 826, groups: FemaleID, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 63.4178 0.4637 136.8
#look at assumptions of eggdiam 5
par(mfcol=c(1,1))
plot(eggdiam5)
qqnorm(resid(eggdiam5))
qqline(resid(eggdiam5))
acf(eggdiam5)
## Error in complete.cases(object): invalid 'type' (S4) of argument
eggdiam6<- lm(EggDiamum~FemaleID, data=CalEggs)
anova(eggdiam3, eggdiam6)
## Analysis of Variance Table
##
## Model 1: EggDiamum ~ WCa_pHDIC
## Model 2: EggDiamum ~ FemaleID
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 824 20347
## 2 796 15410 28 4936.2 9.1061 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggdiam6)
##
## Call:
## lm(formula = EggDiamum ~ FemaleID, data = CalEggs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.2388 -2.7244 -0.2563 2.4543 15.1053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.85003 0.77781 83.375 < 2e-16 ***
## FemaleIDCF01_B2 6.69911 1.16172 5.767 1.16e-08 ***
## FemaleIDCF02_B1 -4.08868 1.17447 -3.481 0.000526 ***
## FemaleIDCF02_B2 -2.11343 1.12808 -1.873 0.061369 .
## FemaleIDCF03_B1 3.99989 1.14979 3.479 0.000531 ***
## FemaleIDCF03_B2 -1.36142 1.12808 -1.207 0.227849
## FemaleIDCF04_B1 -2.97063 1.11818 -2.657 0.008050 **
## FemaleIDCF04_B2 -3.67086 1.12808 -3.254 0.001186 **
## FemaleIDCF05_B1 -4.33730 1.13860 -3.809 0.000150 ***
## FemaleIDCF05_B2 -0.97979 1.11818 -0.876 0.381164
## FemaleIDCF06_B1 -0.23605 1.17447 -0.201 0.840765
## FemaleIDCF07_B1 -3.27936 1.12808 -2.907 0.003750 **
## FemaleIDCF07_B2 -1.80777 1.11818 -1.617 0.106337
## FemaleIDCF08_B1 -0.32337 1.11818 -0.289 0.772506
## FemaleIDEF01_B1 -0.70782 1.14979 -0.616 0.538327
## FemaleIDEF01_B2 -4.63235 1.11818 -4.143 3.80e-05 ***
## FemaleIDEF02_B1 -3.09748 1.18813 -2.607 0.009304 **
## FemaleIDEF02_B2 -1.79542 1.11818 -1.606 0.108742
## FemaleIDEF03_B1 -2.42848 1.14979 -2.112 0.034988 *
## FemaleIDEF03_B2 0.07892 1.12808 0.070 0.944243
## FemaleIDEF04_B1 0.19892 1.12808 0.176 0.860079
## FemaleIDEF04_B2 0.22683 1.12808 0.201 0.840692
## FemaleIDEF05_B1 -5.07044 1.12808 -4.495 8.00e-06 ***
## FemaleIDEF05_B2 -2.31488 1.11818 -2.070 0.038753 *
## FemaleIDEF06_B1 -1.45852 1.25418 -1.163 0.245210
## FemaleIDEF06_B2 2.48721 1.13860 2.184 0.029220 *
## FemaleIDEF07_B1 -3.67532 1.59404 -2.306 0.021386 *
## FemaleIDEF07_B2 -1.31073 1.13860 -1.151 0.250007
## FemaleIDEF08_B1 -3.08252 1.12808 -2.733 0.006424 **
## FemaleIDEF08_B2 -2.20750 1.13860 -1.939 0.052881 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.4 on 796 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.2461, Adjusted R-squared: 0.2187
## F-statistic: 8.962 on 29 and 796 DF, p-value: < 2.2e-16
#look at assumptions of eggdiam 6
par(mfcol=c(2,2))
plot(eggdiam6)
par(mfcol=c(1,1))
acf(eggdiam6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length.
plot(EggDiamum~FemaleID, data=CalEggs) #we need to account for FemaleID, but not egg size in Larvae models
#now look at the response variable EggAreaum2
eggarea1<- lmer(EggAreaum2~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)
#Check assumptions
qqnorm(resid(eggarea1))
qqline(resid(eggarea1)) #definitely doesn't meet assumption of normality
plot(eggarea1)
acf(eggarea1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggarea1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: EggAreaum2 ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 |
## Block) + (1 | AccTankID)
## Data: CalEggs
##
## REML criterion at convergence: 13252.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0997 -0.3203 0.0693 0.4951 3.3492
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 57841 240.50
## AccTankID (Intercept) 0 0.00
## Block (Intercept) 2515 50.15
## Residual 181508 426.04
## Number of obs: 885, groups: FemaleID, 30; AccTankID, 6; Block, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2349.30 438.23 25.68 5.361 1.35e-05 ***
## WCa_pHDIC 537.70 420.45 25.04 1.279 0.213
## AdultLength 108.69 135.43 25.08 0.803 0.430
## WCa_pHDIC:AdultLength -114.35 126.94 25.10 -0.901 0.376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC AdltLn
## WCa_pHDIC -0.882
## AdultLength -0.973 0.852
## WC_pHDIC:AL 0.873 -0.970 -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggarea2<- lmer(EggAreaum2~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggarea1, eggarea2)#Choose eggarea2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggAreaum2 ~ WCa_pHDIC * AdultLength + (1 | FemaleID) + (1 |
## object: Block) + (1 | AccTankID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 5 13305 13329 -6647.6 13295
## object 8 13310 13349 -6647.2 13294 0.8548 3 0.8363
eggarea3<- lm(EggAreaum2~WCa_pHDIC, data=CalEggs)
anova(eggarea2, eggarea3)#choose eggarea2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 13460 13475 -6727.1 13454
## object 5 13305 13329 -6647.6 13295 158.98 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggarea4<- lmer(EggAreaum2~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggarea2, eggarea4)#choose eggarea4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID)
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 4 13303 13322 -6647.6 13295
## object 5 13305 13329 -6647.6 13295 0 1 1
eggarea5<- lmer(EggAreaum2 ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggarea4, eggarea5) #choose eggarea5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggAreaum2 ~ 1 + (1 | FemaleID)
## object: EggAreaum2 ~ WCa_pHDIC + (1 | FemaleID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 13304 13318 -6649.1 13298
## object 4 13303 13322 -6647.6 13295 2.9337 1 0.08675 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggarea5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggAreaum2 ~ 1 + (1 | FemaleID)
## Data: CalEggs
##
## REML criterion at convergence: 13288.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1290 -0.3200 0.0691 0.4909 3.3196
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 60286 245.5
## Residual 181520 426.1
## Number of obs: 885, groups: FemaleID, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2829.39 47.12 60.04
eggarea6<- lm(EggAreaum2~FemaleID, data=CalEggs)
anova(eggarea3, eggarea6)
## Analysis of Variance Table
##
## Model 1: EggAreaum2 ~ WCa_pHDIC
## Model 2: EggAreaum2 ~ FemaleID
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 883 207413205
## 2 855 155205895 28 52207310 10.271 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggarea6)
##
## Call:
## lm(formula = EggAreaum2 ~ FemaleID, data = CalEggs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2612.56 -130.77 23.57 203.23 1413.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2991.01 75.32 39.712 < 2e-16 ***
## FemaleIDCF01_B2 618.54 108.28 5.713 1.54e-08 ***
## FemaleIDCF02_B1 -602.67 107.37 -5.613 2.69e-08 ***
## FemaleIDCF02_B2 -157.33 108.28 -1.453 0.146571
## FemaleIDCF03_B1 260.71 108.28 2.408 0.016258 *
## FemaleIDCF03_B2 -82.04 108.28 -0.758 0.448818
## FemaleIDCF04_B1 -176.67 108.28 -1.632 0.103127
## FemaleIDCF04_B2 -259.31 108.28 -2.395 0.016838 *
## FemaleIDCF05_B1 -366.42 107.37 -3.413 0.000674 ***
## FemaleIDCF05_B2 27.31 108.28 0.252 0.800957
## FemaleIDCF06_B1 74.55 107.37 0.694 0.487682
## FemaleIDCF07_B1 -170.92 109.24 -1.565 0.118015
## FemaleIDCF07_B2 -198.06 108.28 -1.829 0.067718 .
## FemaleIDCF08_B1 -75.27 108.28 -0.695 0.487118
## FemaleIDEF01_B1 -147.38 108.28 -1.361 0.173816
## FemaleIDEF01_B2 -422.48 108.28 -3.902 0.000103 ***
## FemaleIDEF02_B1 -558.53 108.28 -5.158 3.10e-07 ***
## FemaleIDEF02_B2 -274.89 108.28 -2.539 0.011300 *
## FemaleIDEF03_B1 -260.03 108.28 -2.402 0.016537 *
## FemaleIDEF03_B2 -254.46 108.28 -2.350 0.018995 *
## FemaleIDEF04_B1 -61.35 108.28 -0.567 0.571110
## FemaleIDEF04_B2 -14.22 108.28 -0.131 0.895536
## FemaleIDEF05_B1 -425.09 108.28 -3.926 9.33e-05 ***
## FemaleIDEF05_B2 -167.23 108.28 -1.544 0.122839
## FemaleIDEF06_B1 -417.78 108.28 -3.858 0.000123 ***
## FemaleIDEF06_B2 331.26 108.28 3.059 0.002287 **
## FemaleIDEF07_B1 -399.81 148.91 -2.685 0.007396 **
## FemaleIDEF07_B2 -194.25 108.28 -1.794 0.073158 .
## FemaleIDEF08_B1 -151.80 108.28 -1.402 0.161272
## FemaleIDEF08_B2 -355.12 108.28 -3.280 0.001081 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 426.1 on 855 degrees of freedom
## Multiple R-squared: 0.2689, Adjusted R-squared: 0.2441
## F-statistic: 10.85 on 29 and 855 DF, p-value: < 2.2e-16
#look at assumptions of eggarea6
par(mfcol=c(2,2))
plot(eggarea6)
par(mfcol=c(1,1))
acf(eggarea6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length. Same as diameter, but model doesn't seem to meet the assumption of normality
plot(EggAreaum2~FemaleID, data=CalEggs)
#look at EggEccentricity
eggeccen1<- lmer(EggEccentricity~WCa_pHDIC*AdultLength + (1|FemaleID) + (1|Block) +(1|AccTankID), data=CalEggs)
#Check assumptions
qqnorm(resid(eggeccen1))
qqline(resid(eggeccen1)) #looks good!
plot(eggeccen1) #this looks good too
acf(eggeccen1)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(eggeccen1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: EggEccentricity ~ WCa_pHDIC * AdultLength + (1 | FemaleID) +
## (1 | Block) + (1 | AccTankID)
## Data: CalEggs
##
## REML criterion at convergence: -934.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.64607 -0.65973 -0.06525 0.62934 2.94875
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 0.0032377 0.05690
## AccTankID (Intercept) 0.0000000 0.00000
## Block (Intercept) 0.0003786 0.01946
## Residual 0.0187144 0.13680
## Number of obs: 885, groups: FemaleID, 30; AccTankID, 6; Block, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.46654 0.10828 25.84800 4.309 0.00021 ***
## WCa_pHDIC -0.11089 0.10339 24.99100 -1.073 0.29374
## AdultLength -0.01820 0.03331 25.05700 -0.546 0.58961
## WCa_pHDIC:AdultLength 0.02071 0.03122 25.04300 0.663 0.51329
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC AdltLn
## WCa_pHDIC -0.877
## AdultLength -0.968 0.852
## WC_pHDIC:AL 0.869 -0.970 -0.890
#try simplifying the model; step function gives me errors, so I ended up doing this manually
eggeccen2<- lmer(EggEccentricity~WCa_pHDIC+ (1|FemaleID) + (1|Block) , data=CalEggs)
anova(eggeccen1, eggeccen2)#Choose eggeccen2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## object: EggEccentricity ~ WCa_pHDIC * AdultLength + (1 | FemaleID) +
## object: (1 | Block) + (1 | AccTankID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 5 -947.60 -923.67 478.80 -957.60
## object 8 -942.14 -903.86 479.07 -958.14 0.5406 3 0.9099
eggeccen3<- lm(EggEccentricity~WCa_pHDIC, data=CalEggs)
anova(eggeccen2, eggeccen3)#choose eggeccen2
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 -872.16 -857.81 439.08 -878.16
## object 5 -947.60 -923.67 478.80 -957.60 79.437 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eggeccen4<- lmer(EggEccentricity~WCa_pHDIC + (1|FemaleID), data=CalEggs)
anova(eggeccen2, eggeccen4)#choose eggeccen4
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID)
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID) + (1 | Block)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 4 -949.48 -930.34 478.74 -957.48
## object 5 -947.60 -923.67 478.80 -957.60 0.123 1 0.7258
eggeccen5<- lmer(EggEccentricity ~ 1 + (1|FemaleID), data=CalEggs)
anova(eggeccen4, eggeccen5) #choose eggeccen5
## refitting model(s) with ML (instead of REML)
## Data: CalEggs
## Models:
## ..1: EggEccentricity ~ 1 + (1 | FemaleID)
## object: EggEccentricity ~ WCa_pHDIC + (1 | FemaleID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 3 -948.42 -934.06 477.21 -954.42
## object 4 -949.48 -930.34 478.74 -957.48 3.0635 1 0.08007 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggeccen5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: EggEccentricity ~ 1 + (1 | FemaleID)
## Data: CalEggs
##
## REML criterion at convergence: -947.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.68207 -0.67048 -0.06141 0.63147 2.94534
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 0.003507 0.05922
## Residual 0.018714 0.13680
## Number of obs: 885, groups: FemaleID, 30
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.37252 0.01177 31.65
eggeccen6<- lm(EggEccentricity~FemaleID, data=CalEggs)
anova(eggeccen3, eggeccen6)
## Analysis of Variance Table
##
## Model 1: EggEccentricity ~ WCa_pHDIC
## Model 2: EggEccentricity ~ FemaleID
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 883 19.210
## 2 855 16.003 28 3.2065 6.1183 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(eggeccen6)
##
## Call:
## lm(formula = EggEccentricity ~ FemaleID, data = CalEggs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37648 -0.09290 -0.00824 0.08657 0.39311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.335311 0.024185 13.864 < 2e-16 ***
## FemaleIDCF01_B2 0.121992 0.034768 3.509 0.000474 ***
## FemaleIDCF02_B1 -0.055068 0.034478 -1.597 0.110591
## FemaleIDCF02_B2 0.034152 0.034768 0.982 0.326245
## FemaleIDCF03_B1 0.099931 0.034768 2.874 0.004151 **
## FemaleIDCF03_B2 0.042536 0.034768 1.223 0.221511
## FemaleIDCF04_B1 -0.028397 0.034768 -0.817 0.414300
## FemaleIDCF04_B2 -0.049178 0.034768 -1.414 0.157596
## FemaleIDCF05_B1 -0.064377 0.034478 -1.867 0.062215 .
## FemaleIDCF05_B2 -0.050705 0.034768 -1.458 0.145107
## FemaleIDCF06_B1 0.102429 0.034478 2.971 0.003053 **
## FemaleIDCF07_B1 -0.050701 0.035076 -1.445 0.148699
## FemaleIDCF07_B2 0.050535 0.034768 1.453 0.146458
## FemaleIDCF08_B1 0.041736 0.034768 1.200 0.230317
## FemaleIDEF01_B1 -0.003685 0.034768 -0.106 0.915607
## FemaleIDEF01_B2 0.053702 0.034768 1.545 0.122822
## FemaleIDEF02_B1 0.102168 0.034768 2.939 0.003387 **
## FemaleIDEF02_B2 0.113973 0.034768 3.278 0.001087 **
## FemaleIDEF03_B1 0.068439 0.034768 1.968 0.049343 *
## FemaleIDEF03_B2 0.129794 0.034768 3.733 0.000202 ***
## FemaleIDEF04_B1 0.051040 0.034768 1.468 0.142470
## FemaleIDEF04_B2 0.028669 0.034768 0.825 0.409843
## FemaleIDEF05_B1 -0.048820 0.034768 -1.404 0.160635
## FemaleIDEF05_B2 0.016992 0.034768 0.489 0.625162
## FemaleIDEF06_B1 0.116906 0.034768 3.362 0.000807 ***
## FemaleIDEF06_B2 0.040295 0.034768 1.159 0.246804
## FemaleIDEF07_B1 0.009502 0.047818 0.199 0.842532
## FemaleIDEF07_B2 0.140033 0.034768 4.028 6.14e-05 ***
## FemaleIDEF08_B1 -0.016346 0.034768 -0.470 0.638376
## FemaleIDEF08_B2 0.113611 0.034768 3.268 0.001128 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1368 on 855 degrees of freedom
## Multiple R-squared: 0.1827, Adjusted R-squared: 0.155
## F-statistic: 6.591 on 29 and 855 DF, p-value: < 2.2e-16
#look at assumptions of eggeccen6
par(mfcol=c(2,2))
plot(eggeccen6)
par(mfcol=c(1,1))
acf(eggeccen6)
## Error in complete.cases(object): not all arguments have the same length
#FemaleID matters but not block, treatment or adult length. Same as diameter
plot(EggEccentricity~FemaleID, data=CalEggs)
Look at the water chemistry data for Jars
#start with omega calcite
plot(JarOmegaCalcite~JarTrt, data=LarvByJarDat)
#remove the Jars that have their pH as 0/NA
CalDat<- subset(LarvByJarDat, JarOmegaCalcite!= "NA")
#I was having trouble with making this model: JarOmegaCalcite~JarTrt*ParentTrt*TimeFilter*BlockID, so I looked at TimeFilter because I think that is the problem.
Jar1<- lm(JarOmegaCalcite~TimeFilter, data=CalDat)
par(mfcol=c(2,2))
plot(Jar1)
## Warning: not plotting observations with leverage one:
## 1, 4, 5, 6, 7, 8, 13, 15, 17, 18, 22, 23, 29, 34, 36, 48, 49, 52, 53, 57, 58, 62, 64, 66, 90, 91, 94, 95, 100, 103, 111, 126, 135, 141, 142, 150, 153, 161, 164, 168, 171, 178, 182, 186, 193, 195, 199, 215, 225, 226, 241, 247, 253, 257, 258, 266, 269, 270, 271, 277, 282, 283, 294, 297, 298, 302, 312, 313, 314, 316, 317, 318, 319, 322, 323, 326, 334, 342, 345, 349, 352, 357, 364, 367, 372, 374, 375, 380
## Warning: not plotting observations with leverage one:
## 1, 4, 5, 6, 7, 8, 13, 15, 17, 18, 22, 23, 29, 34, 36, 48, 49, 52, 53, 57, 58, 62, 64, 66, 90, 91, 94, 95, 100, 103, 111, 126, 135, 141, 142, 150, 153, 161, 164, 168, 171, 178, 182, 186, 193, 195, 199, 215, 225, 226, 241, 247, 253, 257, 258, 266, 269, 270, 271, 277, 282, 283, 294, 297, 298, 302, 312, 313, 314, 316, 317, 318, 319, 322, 323, 326, 334, 342, 345, 349, 352, 357, 364, 367, 372, 374, 375, 380
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
par(mfcol=c(1,1))
#looks like we can't use timefilter because it is too specific to each jar. Get the error message "observations with leverage one"; try using seatable instead
Jar2<- lmer(JarOmegaCalcite~JarTrt*JarSeatable + (1|Block), data=CalDat)
#check assumptions
plot(Jar2)
qqnorm(resid(Jar2))
qqline(resid(Jar2))
#looks good to me.
Jar2step<-step(Jar2)
print(Jar2step)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## Block 1324.18 1 kept < 1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value elim.num
## JarTrt 48.5911 48.5911 1 374.71 275385.9221 kept
## JarSeatable 0.0020 0.0010 2 374.70 5.5659 kept
## JarTrt:JarSeatable 0.0013 0.0007 2 374.70 3.6907 kept
## Pr(>F)
## JarTrt <1e-07
## JarSeatable 0.0041
## JarTrt:JarSeatable 0.0259
##
## Least squares means:
## JarTrt JarSeatable Estimate Standard Error
## JarTrt Control 1.0 NA 1.182 0.096
## JarTrt Exposed 2.0 NA 0.334 0.096
## JarSeatable 2 NA 1.0 0.755 0.096
## JarSeatable 4 NA 2.0 0.758 0.096
## JarSeatable 6 NA 3.0 0.761 0.096
## JarTrt:JarSeatable Control 2 1.0 1.0 1.177 0.096
## JarTrt:JarSeatable Exposed 2 2.0 1.0 0.333 0.096
## JarTrt:JarSeatable Control 4 1.0 2.0 1.182 0.096
## JarTrt:JarSeatable Exposed 4 2.0 2.0 0.334 0.096
## JarTrt:JarSeatable Control 6 1.0 3.0 1.188 0.096
## JarTrt:JarSeatable Exposed 6 2.0 3.0 0.334 0.096
## DF t-value Lower CI Upper CI p-value
## JarTrt Control 2.9 12.32 0.8699 1.495 0.0014 **
## JarTrt Exposed 2.9 3.48 0.0213 0.646 0.0427 *
## JarSeatable 2 2.9 7.87 0.4429 1.068 0.0049 **
## JarSeatable 4 2.9 7.89 0.4452 1.070 0.0049 **
## JarSeatable 6 2.9 7.93 0.4487 1.074 0.0048 **
## JarTrt:JarSeatable Control 2 2.9 12.26 0.8650 1.490 0.0014 **
## JarTrt:JarSeatable Exposed 2 2.9 3.47 0.0209 0.646 0.0428 *
## JarTrt:JarSeatable Control 4 2.9 12.31 0.8694 1.494 0.0014 **
## JarTrt:JarSeatable Exposed 4 2.9 3.47 0.0211 0.646 0.0427 *
## JarTrt:JarSeatable Control 6 2.9 12.37 0.8755 1.500 0.0014 **
## JarTrt:JarSeatable Exposed 6 2.9 3.48 0.0219 0.647 0.0424 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Differences of LSMEANS:
## Estimate Standard Error DF
## JarTrt Control - Exposed 0.8 0.0016 374.7
## JarSeatable 2 - 4 0.0 0.0017 374.7
## JarSeatable 2 - 6 0.0 0.0018 374.7
## JarSeatable 4 - 6 0.0 0.0017 374.7
## JarTrt:JarSeatable Control 2 - Exposed 2 0.8 0.0026 374.7
## JarTrt:JarSeatable Control 2 - Control 4 0.0 0.0028 374.7
## JarTrt:JarSeatable Control 2 - Exposed 4 0.8 0.0026 374.7
## JarTrt:JarSeatable Control 2 - Control 6 0.0 0.0028 374.7
## JarTrt:JarSeatable Control 2 - Exposed 6 0.8 0.0026 374.7
## JarTrt:JarSeatable Exposed 2 - Control 4 -0.8 0.0026 374.7
## JarTrt:JarSeatable Exposed 2 - Exposed 4 0.0 0.0021 374.7
## JarTrt:JarSeatable Exposed 2 - Control 6 -0.9 0.0026 374.7
## JarTrt:JarSeatable Exposed 2 - Exposed 6 0.0 0.0021 374.7
## JarTrt:JarSeatable Control 4 - Exposed 4 0.8 0.0026 374.7
## JarTrt:JarSeatable Control 4 - Control 6 0.0 0.0028 374.7
## JarTrt:JarSeatable Control 4 - Exposed 6 0.8 0.0026 374.7
## JarTrt:JarSeatable Exposed 4 - Control 6 -0.9 0.0026 374.7
## JarTrt:JarSeatable Exposed 4 - Exposed 6 0.0 0.0021 374.7
## JarTrt:JarSeatable Control 6 - Exposed 6 0.9 0.0026 374.7
## t-value Lower CI Upper CI
## JarTrt Control - Exposed 524.77 0.8454 0.8518
## JarSeatable 2 - 4 -1.35 -0.0058 0.0011
## JarSeatable 2 - 6 -3.32 -0.0093 -0.0024
## JarSeatable 4 - 6 -1.98 -0.0069 0.0000
## JarTrt:JarSeatable Control 2 - Exposed 2 327.04 0.8390 0.8491
## JarTrt:JarSeatable Control 2 - Control 4 -1.57 -0.0099 0.0011
## JarTrt:JarSeatable Control 2 - Exposed 4 328.88 0.8387 0.8488
## JarTrt:JarSeatable Control 2 - Control 6 -3.75 -0.0161 -0.0050
## JarTrt:JarSeatable Control 2 - Exposed 6 325.63 0.8379 0.8481
## JarTrt:JarSeatable Exposed 2 - Control 4 -328.75 -0.8535 -0.8434
## JarTrt:JarSeatable Exposed 2 - Exposed 4 -0.14 -0.0043 0.0038
## JarTrt:JarSeatable Exposed 2 - Control 6 -328.94 -0.8597 -0.8495
## JarTrt:JarSeatable Exposed 2 - Exposed 6 -0.51 -0.0052 0.0030
## JarTrt:JarSeatable Control 4 - Exposed 4 330.60 0.8431 0.8532
## JarTrt:JarSeatable Control 4 - Control 6 -2.18 -0.0117 -0.0006
## JarTrt:JarSeatable Control 4 - Exposed 6 327.33 0.8423 0.8525
## JarTrt:JarSeatable Exposed 4 - Control 6 -330.77 -0.8594 -0.8492
## JarTrt:JarSeatable Exposed 4 - Exposed 6 -0.38 -0.0048 0.0033
## JarTrt:JarSeatable Control 6 - Exposed 6 327.53 0.8484 0.8587
## p-value
## JarTrt Control - Exposed <2e-16 ***
## JarSeatable 2 - 4 0.178
## JarSeatable 2 - 6 0.001 ***
## JarSeatable 4 - 6 0.048 *
## JarTrt:JarSeatable Control 2 - Exposed 2 <2e-16 ***
## JarTrt:JarSeatable Control 2 - Control 4 0.117
## JarTrt:JarSeatable Control 2 - Exposed 4 <2e-16 ***
## JarTrt:JarSeatable Control 2 - Control 6 2e-04 ***
## JarTrt:JarSeatable Control 2 - Exposed 6 <2e-16 ***
## JarTrt:JarSeatable Exposed 2 - Control 4 <2e-16 ***
## JarTrt:JarSeatable Exposed 2 - Exposed 4 0.890
## JarTrt:JarSeatable Exposed 2 - Control 6 <2e-16 ***
## JarTrt:JarSeatable Exposed 2 - Exposed 6 0.609
## JarTrt:JarSeatable Control 4 - Exposed 4 <2e-16 ***
## JarTrt:JarSeatable Control 4 - Control 6 0.030 *
## JarTrt:JarSeatable Control 4 - Exposed 6 <2e-16 ***
## JarTrt:JarSeatable Exposed 4 - Control 6 <2e-16 ***
## JarTrt:JarSeatable Exposed 4 - Exposed 6 0.707
## JarTrt:JarSeatable Control 6 - Exposed 6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Final model:
## lme4::lmer(formula = JarOmegaCalcite ~ JarTrt * JarSeatable +
## (1 | Block), data = CalDat, contrasts = list(JarTrt = "contr.SAS",
## JarSeatable = "contr.SAS"))
#final model chosen: JarpHCorrSW~JarTrt*JarSeatable + (1|Block)
Jar3<- lmer(JarOmegaCalcite~JarTrt*JarSeatable + (1|Block), data=CalDat)
#check assumptions
qqnorm(resid(Jar3))
qqline(resid(Jar3))
plot(Jar3) #may want to look at this further
summary(Jar3)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: JarOmegaCalcite ~ JarTrt * JarSeatable + (1 | Block)
## Data: CalDat
##
## REML criterion at convergence: -2142.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.1968 -0.0456 0.0129 0.0345 14.2111
##
## Random effects:
## Groups Name Variance Std.Dev.
## Block (Intercept) 0.0184273 0.13575
## Residual 0.0001764 0.01328
## Number of obs: 381, groups: Block, 2
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 1.177333 0.096012 2.500000 12.262
## JarTrtExposed -0.844042 0.002581 374.700000 -327.042
## JarSeatable4 0.004403 0.002800 374.700000 1.572
## JarSeatable6 0.010553 0.002816 374.700000 3.747
## JarTrtExposed:JarSeatable4 -0.004117 0.003478 374.700000 -1.184
## JarTrtExposed:JarSeatable6 -0.009489 0.003502 374.700000 -2.710
## Pr(>|t|)
## (Intercept) 0.002503 **
## JarTrtExposed < 2e-16 ***
## JarSeatable4 0.116690
## JarSeatable6 0.000207 ***
## JarTrtExposed:JarSeatable4 0.237239
## JarTrtExposed:JarSeatable6 0.007038 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) JrTrtE JrStb4 JrStb6 JTE:JS4
## JarTrtExpsd -0.018
## JarSeatabl4 -0.015 0.543
## JarSeatabl6 -0.015 0.539 0.497
## JrTrtEx:JS4 0.012 -0.678 -0.805 -0.400
## JrTrtEx:JS6 0.012 -0.671 -0.400 -0.804 0.499
#JarTrt and JarSeatable are both significant for JarpH and there is a significant interaction, block is also a significant but I don't know what to do about only having Exposed jars from Block2. We were expecting JarTrt, but not JarSeatable.
Visualize the data for larvae morphology using the subset data
ggplot(LarvaeDat, aes(x=LarvaeDiamum, color=JarTrt))+
geom_histogram(alpha=0.5, position="identity") +
facet_grid(ParentTrt~Block)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#interesting bimodality in B2
larvmeans<- aggregate(LarvaeDiamum~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)#CP/CL : 75.24389; EP/CL: 75.46671; CP/EL: 65.47536; EP/EL: 66.48770
larvmedians<- aggregate(LarvaeDiamum~ParentTrt*JarTrt, data=LarvaeDat, FUN=median)#CP/CL : 75.29579; EP/CL: 75.52886; CP/EL: 65.27141; EP/EL: 66.86843
#I think we can use the mean for these data
Look at the larvae diameter data
#create most complex model and then use the step function to see which model is best fit for the data; these are for ParentTrt and JarTrt as covariates. I decided to use pH since we have that for the all of the bottles (once we get the salinity to use) and the tanks.
larvlen<-lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block), data=LarvaeDat)
steppedlarvlen<-step(larvlen)
print(steppedlarvlen)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.00 1 1 1.0000
## AccTankID 0.00 1 2 1.0000
## Block 0.00 1 3 1.0000
## CrossID 1.62 1 4 0.2028
## FemaleID 107.79 1 kept <1e-07
## MaleID 114.97 1 kept <1e-07
## JarSeatable 4.75 1 kept 0.0293
## JarID 608.68 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value
## WCa_pHDIC 3.3920 3.3920 1 37.16 0.3653
## JarOmegaCalcite 2192.1164 2192.1164 1 312.74 236.0772
## WCa_pHDIC:JarOmegaCalcite 50.0248 50.0248 1 312.69 5.3874
## elim.num Pr(>F)
## WCa_pHDIC kept 0.5492
## JarOmegaCalcite kept <1e-07
## WCa_pHDIC:JarOmegaCalcite kept 0.0209
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = LarvaeDiamum ~ WCa_pHDIC + JarOmegaCalcite +
## (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 |
## JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the lme4 step function: y~WCa_pHDIC*JarOmegaCalcite +(1|FemaleID)+(1|MaleID)+(1|JarID) + (1|JarSeatable)
larvlenfin<- lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)
#check the final model to see that it meets assumptions
plot(larvlenfin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvlenfin))
qqline(resid(larvlenfin)) #has relatively long tails, doesn't seem to meet assumption of normality. Check with Katie
summary(larvlenfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +
## (1 | MaleID) + (1 | JarSeatable) + (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: 19795.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4932 -0.5235 0.0475 0.5853 4.5079
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 4.0066 2.0017
## FemaleID (Intercept) 6.1849 2.4869
## MaleID (Intercept) 6.5666 2.5625
## JarSeatable (Intercept) 0.1607 0.4008
## Residual 9.2856 3.0472
## Number of obs: 3758, groups:
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 62.8701 1.5756 36.2000 39.902 <2e-16
## WCa_pHDIC -1.0075 1.6669 37.1600 -0.604 0.5492
## JarOmegaCalcite 10.8540 0.7064 312.7400 15.365 <2e-16
## WCa_pHDIC:JarOmegaCalcite 1.7927 0.7724 312.6900 2.321 0.0209
##
## (Intercept) ***
## WCa_pHDIC
## JarOmegaCalcite ***
## WCa_pHDIC:JarOmegaCalcite *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.866
## JarOmegClct -0.243 0.221
## WC_HDIC:JOC 0.215 -0.246 -0.893
#signficant effect of Jar calcite sat, and sig. interaction between parent and Jar saturation states
#look at autocorrelation.
acf(resid(larvlenfin)) #this looks good
#predit some of the data
LarvaeDat$DiamPredict<-predict(larvlenfin)
Do the Larvae analysis without Block2
#Try without B2 data
larvlenB1<-lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID), data=LarvaeDatB1)
steppedlarvlenB1<-step(larvlenB1)
print(steppedlarvlenB1)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.00 1 1 1.0000
## AccTankID 0.70 1 2 0.4035
## CrossID 2.07 1 3 0.1507
## FemaleID 34.48 1 kept <1e-07
## MaleID 12.38 1 kept 0.0004
## JarSeatable 6.19 1 kept 0.0128
## JarID 192.38 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value
## WCa_pHDIC 54.3154 54.3154 1 24.34 7.0288
## JarOmegaCalcite 3719.8750 3719.8750 1 240.71 481.3803
## WCa_pHDIC:JarOmegaCalcite 86.7777 86.7777 1 240.67 11.2297
## elim.num Pr(>F)
## WCa_pHDIC kept 0.0139
## JarOmegaCalcite kept <1e-07
## WCa_pHDIC:JarOmegaCalcite kept 0.0009
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = LarvaeDiamum ~ WCa_pHDIC + JarOmegaCalcite +
## (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 |
## JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDatB1)
larvlenB1fin<- lmer(LarvaeDiamum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarID), data=LarvaeDatB1)
#this is the same final model as with both blocks
#check the final model to see that it meets assumptions
plot(larvlenB1fin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvlenB1fin))
qqline(resid(larvlenB1fin)) #has relatively long tails, Katie, what do you think?
#look at autocorrelation.
acf(resid(larvlenB1fin)) #this looks good
summary(larvlenB1fin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +
## (1 | MaleID) + (1 | JarID)
## Data: LarvaeDatB1
##
## REML criterion at convergence: 13527.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8586 -0.5448 0.0468 0.6013 4.8986
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 1.7328 1.3163
## FemaleID (Intercept) 0.7665 0.8755
## MaleID (Intercept) 0.3359 0.5796
## Residual 7.7275 2.7798
## Number of obs: 2698, groups: JarID, 270; FemaleID, 15; MaleID, 11
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 63.7253 0.7366 25.3300 86.516 < 2e-16
## WCa_pHDIC -2.1639 0.8177 24.4900 -2.646 0.01400
## JarOmegaCalcite 10.8516 0.5041 242.6900 21.527 < 2e-16
## WCa_pHDIC:JarOmegaCalcite 1.8018 0.5511 242.6600 3.270 0.00123
##
## (Intercept) ***
## WCa_pHDIC *
## JarOmegaCalcite ***
## WCa_pHDIC:JarOmegaCalcite **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.889
## JarOmegClct -0.453 0.398
## WC_HDIC:JOC 0.404 -0.444 -0.893
#if we just look at block 1, both main effects are significant and their interaction.
Visualize the larvae length data using continuous variables
#plot the data using the means for each jar
ggplot(LarvaeDat,
aes(fill = WCa_pHDIC, y = LarvaeDiamum, x = JarOmegaCalcite)) +
geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
ggtitle("Larvae diameter by larvae jar saturation state")+
ylab("Larvae Diameter (um)")+
xlab("Larvae Jar Calcite Saturation State")+
theme_classic()
#the reason there is spread is that for the jars that we actually measured carbonate dynamics I used those values, for all other jars, I used the average of those measurements
#Try to visualize it in a more pleasant way
#get means and SE of larvae diameters
meanlarv<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
selarv<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))
#plot the mean Larvae Diameter +/- SE
ggplot(data = meanlarv, aes(x = JarOmegaCalcite, y = LarvaeDiamum, group=ParentTrt, fill=ParentTrt)) +
geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.05, position=position_dodge(0))+
geom_errorbarh(aes(xmin= meanlarv$JarOmegaCalcite-selarv$JarOmegaCalcite, xmax=meanlarv$JarOmegaCalcite+selarv$JarOmegaCalcite),width=0.05, position=position_dodge(0))+
geom_point(aes(colour=ParentTrt))+
labs(x = "Larvae Jar Saturation State (Calcite)", y = "Larvae Diameter (um)") +
scale_color_manual(values = c("skyblue2", "red2")) +
theme_classic()
## Warning: Ignoring unknown parameters: width
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
#should we add a model line to this?
#alternatively, I could visualize with bargraphs:
ggplot(data = meanlarv, aes(x = JarTrt, y = LarvaeDiamum, fill=ParentTrt)) +
geom_bar(stat="identity", aes(fill=ParentTrt), position="dodge")+
labs(x = "Larvae Treatment", y = "Larvae Diameter (um)") +
scale_fill_manual(name = "Parent Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.2, position=position_dodge(.9))+
theme_classic()
#plot with parent treatment on the x-axis
ggplot(data = meanlarv, aes(x = ParentTrt , y = LarvaeDiamum, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Diameter (um)") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanlarv$LarvaeDiamum-selarv$LarvaeDiamum, ymax=meanlarv$LarvaeDiamum+selarv$LarvaeDiamum),width=0.2, position=position_dodge(.9))+
theme_classic()
#now plot the difference between parental treatments for larvae diameter
crossdiff <- data.frame(tapply(LarvaeDat$LarvaeDiamum, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
rownames_to_column("CrossID") %>%
mutate(CrossTemp = CrossID) %>%
mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
crossdiff$CrossID<- as.factor(crossdiff$CrossID)
crossdiff$FemaleID<-as.factor(crossdiff$FemaleID)
crossdiff$MaleID<-as.factor(crossdiff$MaleID)
#look at cross differences by Male and Female ID
ggplot(data = crossdiff, aes(x = MaleID, y = Diff)) +
geom_point(aes(colour=FemaleID)) +
labs(x = "Male ID", y = "Change in mean shell length (um) per family\nfrom control to OA conditions") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).
#get the means of the differences to visualize overall differences
meandiff<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=mean)
sediff<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=se)
ggplot(data = meandiff, aes(x = ParentTrt, y = Diff)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge") +
labs(x = "Parental Treatment", y = "Change in mean shell length (um) per family\nfrom control to OA conditions") +
scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA Exposed")) +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meandiff$Diff-sediff$Diff, ymax=meandiff$Diff+sediff$Diff),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
diffttest<- t.test(Diff~ParentTrt, data=crossdiff, var.equal=T)
diffttest #mean shell length difference is significantly higher for OA parent treatment
##
## Two Sample t-test
##
## data: Diff by ParentTrt
## t = -2.6006, df = 43, p-value = 0.01271
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.1378060 -0.2703394
## sample estimates:
## mean in group Control mean in group Exposed
## -11.049847 -9.845775
Analyze at growth per day
larvgrow<-lmer(GrowthPerDay~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)
steppedlarvgrow<- step(larvgrow)
print(steppedlarvgrow)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.00 1 1 1.0000
## AccTankID 0.00 1 2 1.0000
## Block 0.00 1 3 1.0000
## CrossID 1.13 1 4 0.2878
## FemaleID 174.50 1 kept <1e-07
## MaleID 118.41 1 kept <1e-07
## JarSeatable 4.85 1 kept 0.0277
## JarID 606.52 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value elim.num
## WCa_pHDIC 1.9298 1.9298 1 39.45 1.8705 kept
## JarOmegaCalcite 246.3220 246.3220 1 315.88 238.7510 kept
## WCa_pHDIC:JarOmegaCalcite 5.5426 5.5426 1 315.82 5.3722 kept
## Pr(>F)
## WCa_pHDIC 0.1792
## JarOmegaCalcite <1e-07
## WCa_pHDIC:JarOmegaCalcite 0.0211
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = GrowthPerDay ~ WCa_pHDIC + JarOmegaCalcite +
## (1 | FemaleID) + (1 | MaleID) + (1 | JarSeatable) + (1 |
## JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the lme4 step function: y~pHSWCorr*JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarID)
larvgrowfin<- lmer(GrowthPerDay~WCa_pHDIC*JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+(1|JarID)+ (1|JarSeatable), data=LarvaeDat)
#check the final model to see that it meets assumptions
plot(larvgrowfin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvgrowfin))
qqline(resid(larvgrowfin)) #has relatively long tails, doesn't seem to meet assumption of normality.
summary(larvgrowfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: GrowthPerDay ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +
## (1 | MaleID) + (1 | JarID) + (1 | JarSeatable)
## Data: LarvaeDat
##
## REML criterion at convergence: 11550.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4955 -0.5203 0.0481 0.5852 4.5120
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 0.44078 0.6639
## FemaleID (Intercept) 0.81313 0.9017
## MaleID (Intercept) 0.79998 0.8944
## JarSeatable (Intercept) 0.01798 0.1341
## Residual 1.03171 1.0157
## Number of obs: 3758, groups:
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.1795 0.5554 38.2400 0.323 0.7484
## WCa_pHDIC -0.8050 0.5886 39.4500 -1.368 0.1792
## JarOmegaCalcite 3.6244 0.2346 315.8800 15.452 <2e-16
## WCa_pHDIC:JarOmegaCalcite 0.5944 0.2565 315.8200 2.318 0.0211
##
## (Intercept)
## WCa_pHDIC
## JarOmegaCalcite ***
## WCa_pHDIC:JarOmegaCalcite *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.867
## JarOmegClct -0.229 0.208
## WC_HDIC:JOC 0.202 -0.231 -0.893
#looks like no collinearity between predictors
#look at autocorrelation.
acf(resid(larvgrowfin)) #this looks fine to me.
#significant main effect of JarOmegaCalcite and significant interaction
#plot growth per day
ggplot(LarvaeDat,
aes(fill = WCa_pHDIC, y = GrowthPerDay, x = JarOmegaCalcite)) +
geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
ggtitle("Larvae growth by parent treatment")+
ylab("Growth (um/day)")+
theme_classic()
#get means and SE of larvae growth
meangrowth<- aggregate(GrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
segrowth<- aggregate(GrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)
#use a bargraph to plot the mean Larvae Diameter +/- SE
ggplot(data = meangrowth, aes(x = ParentTrt, y = GrowthPerDay, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Growth (um/day)") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meangrowth$GrowthPerDay-segrowth$GrowthPerDay, ymax=meangrowth$GrowthPerDay+segrowth$GrowthPerDay),width=0.2, position=position_dodge(.9))+
theme_classic()
#now plot the difference between parental treatments for larvae growth
growthdiff <- data.frame(tapply(LarvaeDat$GrowthPerDay, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
rownames_to_column("CrossID") %>%
mutate(CrossTemp = CrossID) %>%
mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
growthdiff$CrossID<- as.factor(growthdiff$CrossID)
growthdiff$FemaleID<-as.factor(growthdiff$FemaleID)
growthdiff$MaleID<-as.factor(growthdiff$MaleID)
#look at cross differences by Male and Female ID
ggplot(data = growthdiff, aes(x = MaleID, y = Diff)) +
geom_point(aes(colour=FemaleID)) +
labs(x = "Male ID", y = "Change in larvae growth (um/day) per family\nfrom control to OA conditions") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).
#get the means of the differences to visualize overall differences
meangrdiff<- aggregate(Diff~ParentTrt, data=growthdiff, FUN=mean)
segrdiff<- aggregate(Diff~ParentTrt, data=growthdiff, FUN=se)
ggplot(data = meangrdiff, aes(x = ParentTrt, y = Diff)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge") +
labs(x = "Parental Treatment", y = "Change in larvae growth (um) per family\nfrom control to OA conditions") +
scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meangrdiff$Diff-segrdiff$Diff, ymax=meangrdiff$Diff+segrdiff$Diff),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
diffgrttest<- t.test(Diff~ParentTrt, data=growthdiff, var.equal=T)
diffgrttest #significant effect of parent treatment on growth.
##
## Two Sample t-test
##
## data: Diff by ParentTrt
## t = -2.6006, df = 43, p-value = 0.01271
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.71260201 -0.09011314
## sample estimates:
## mean in group Control mean in group Exposed
## -3.683282 -3.281925
Look at larvae area
boxplot(LarvaeAreaum2~Block*ParentTrt, data=LarvaeDat)
#quick anova to look for potential differences
anovarea<- aov(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat)
anova(anovarea)
## Analysis of Variance Table
##
## Response: LarvaeAreaum2
## Df Sum Sq Mean Sq F value Pr(>F)
## ParentTrt 1 188189 188189 1.1413 0.28545
## JarTrt 1 755776265 755776265 4583.4692 < 2e-16 ***
## ParentTrt:JarTrt 1 551198 551198 3.3428 0.06758 .
## Residuals 3754 619003638 164892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(anovarea$residuals) #doesn't meet assumption of normality
##
## Shapiro-Wilk normality test
##
## data: anovarea$residuals
## W = 0.99594, p-value = 1.257e-08
par(mfcol=c(2,2))
plot(anovarea)
par(mfcol=c(1,1))
#create most complex model and then use the step function to see which model is best fit for the data; these are for ParentTrt and JarTrt as fixed effects not covariates
larvarea<-lmer(LarvaeAreaum2~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)
steppedlarvarea<-step(larvarea)
print(steppedlarvarea)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.00 1 1 1.0000
## Block 0.00 1 2 1.0000
## AccTankID 0.00 1 3 1.0000
## FemaleID 23.24 1 kept 0
## MaleID 29.00 1 kept <1e-07
## CrossID 3.87 1 kept 0.0492
## JarSeatable 3.19 1 kept 0.0739
## JarID 542.13 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value
## WCa_pHDIC:JarOmegaCalcite 2.417751e+05 2.417751e+05 1 277.52 3.2754
## WCa_pHDIC 5.888224e+02 5.888224e+02 1 29.92 0.0080
## JarOmegaCalcite 1.434758e+08 1.434758e+08 1 278.07 1943.7314
## elim.num Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 1 0.0714
## WCa_pHDIC 2 0.9294
## JarOmegaCalcite kept <1e-07
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = LarvaeAreaum2 ~ JarOmegaCalcite + (1 | FemaleID) +
## (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + (1 | JarID),
## data = LarvaeDat)
#final model chosen by the lme4 step function: y~JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarSeatable)+(1|JarID)
larvareafin<- lmer(LarvaeAreaum2~JarOmegaCalcite+(1|FemaleID)+(1|MaleID)+ (1|JarSeatable)+(1|JarID), data=LarvaeDat)
#check the final model to see that it meets assumptions
plot(larvareafin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvareafin))
qqline(resid(larvareafin)) #has relatively long tails, doesn't seem to meet assumption of normality.
summary(larvareafin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## LarvaeAreaum2 ~ JarOmegaCalcite + (1 | FemaleID) + (1 | MaleID) +
## (1 | JarSeatable) + (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: 53546.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0323 -0.5695 0.0387 0.6062 4.9336
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 33966.7 184.30
## FemaleID (Intercept) 44846.0 211.77
## MaleID (Intercept) 47896.4 218.85
## JarSeatable (Intercept) 928.7 30.47
## Residual 73809.1 271.68
## Number of obs: 3758, groups:
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2548.47 66.99 35.26 38.04 <2e-16 ***
## JarOmegaCalcite 1241.41 29.13 314.93 42.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## JarOmegClct -0.239
#look at autocorrelation.
acf(resid(larvareafin)) #this looks fine to me.
#plot the data using the means for each jar
ggplot(LarvaeDat,
aes(fill = WCa_pHDIC, y = LarvaeAreaum2, x = JarOmegaCalcite)) +
geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
ggtitle("Larvae area by larvae treatment")+
ylab("Larvae Area (um2)")+
theme_classic()
#get means and SE of larvae areas
meanarea<- aggregate(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
searea<- aggregate(LarvaeAreaum2~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)
#use a bargraph to plot the mean Larvae area +/- SE
ggplot(data = meanarea, aes(x = ParentTrt, y = LarvaeAreaum2, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Area (um2)") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanarea$LarvaeAreaum2-searea$LarvaeAreaum2, ymax=meanarea$LarvaeAreaum2+searea$LarvaeAreaum2),width=0.2, position=position_dodge(.9))+
theme_classic()
#now plot the difference between parental treatments for larvae area
areadiff <- data.frame(tapply(LarvaeDat$LarvaeAreaum2, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
rownames_to_column("CrossID") %>%
mutate(CrossTemp = CrossID) %>%
mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
areadiff$CrossID<- as.factor(crossdiff$CrossID)
areadiff$FemaleID<-as.factor(crossdiff$FemaleID)
areadiff$MaleID<-as.factor(crossdiff$MaleID)
#look at cross differences by Male and Female ID
ggplot(data = areadiff, aes(x = MaleID, y = Diff)) +
geom_point(aes(colour=FemaleID)) +
labs(x = "Male ID", y = "Change in mean shell area (um2) per family\nfrom control to OA conditions") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).
#get the means of the differences to visualize overall differences
meanareadiff<- aggregate(Diff~ParentTrt, data=areadiff, FUN=mean)
seareadiff<- aggregate(Diff~ParentTrt, data=areadiff, FUN=se)
ggplot(data = meanareadiff, aes(x = ParentTrt, y = Diff)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge") +
labs(x = "Parental Treatment", y = "Change in mean shell area (um2) per family\nfrom control to OA conditions") +
scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meanareadiff$Diff-seareadiff$Diff, ymax=meanareadiff$Diff+seareadiff$Diff),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
diffareattest<- t.test(Diff~ParentTrt, data=areadiff)
diffareattest #mean shell area difference is not significantly higher for OA parent treatment
##
## Welch Two Sample t-test
##
## data: Diff by ParentTrt
## t = -1.7272, df = 39.691, p-value = 0.09192
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -168.53796 13.23648
## sample estimates:
## mean in group Control mean in group Exposed
## -1093.243 -1015.592
Look at growth in terms of area
#just subtract egg area from larvae area and then divide by 3
LarvaeDat$AreaGrowthPerDay<- (LarvaeDat$LarvaeAreaum2-LarvaeDat$EggAreaum2)/3
#create LMM for growth per day
larvgrowarea<-lmer(AreaGrowthPerDay~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)
steppedlarvgrowarea<- step(larvgrowarea)
print(steppedlarvgrowarea)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## Block 0.00 1 1 1.0000
## AccTankID 0.00 1 2 1.0000
## TrtTankID 0.00 1 3 1.0000
## FemaleID 35.74 1 kept <1e-07
## MaleID 32.46 1 kept <1e-07
## CrossID 3.11 1 kept 0.0778
## JarSeatable 3.26 1 kept 0.0712
## JarID 541.97 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value
## WCa_pHDIC:JarOmegaCalcite 26423.4 26423.4 1 278.47 3.2217
## WCa_pHDIC 15058.6 15058.6 1 33.76 1.8361
## JarOmegaCalcite 15988833.1 15988833.1 1 279.17 1949.4845
## elim.num Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 1 0.0738
## WCa_pHDIC 2 0.1844
## JarOmegaCalcite kept <1e-07
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = AreaGrowthPerDay ~ JarOmegaCalcite + (1 |
## FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) +
## (1 | JarID), data = LarvaeDat)
#final model chosen by the lme4 step function: y~JarOmegaCalcite+(1|FemaleID)+(1|CrossID)+ (1|MaleID)+ (1|JarSeatable)+(1|JarID)
larvgrowareafin<- lmer(AreaGrowthPerDay~JarTrt+(1|FemaleID)+(1|MaleID)+ (1|CrossID)+ (1|JarSeatable)+(1|JarID), data=LarvaeDat)
#check the final model to see that it meets assumptions
plot(larvgrowareafin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvgrowareafin))
qqline(resid(larvgrowareafin)) #has relatively long tails, doesn't seem to meet assumption of normality.
summary(larvgrowareafin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: AreaGrowthPerDay ~ JarTrt + (1 | FemaleID) + (1 | MaleID) + (1 |
## CrossID) + (1 | JarSeatable) + (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: 45296.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0718 -0.5668 0.0409 0.6045 4.9666
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 3432.4 58.59
## CrossID (Intercept) 572.7 23.93
## FemaleID (Intercept) 6777.6 82.33
## MaleID (Intercept) 5724.8 75.66
## JarSeatable (Intercept) 127.8 11.31
## Residual 8201.6 90.56
## Number of obs: 3758, groups:
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 392.015 24.568 37.460 15.96 <2e-16 ***
## JarTrtExposed -351.498 7.916 281.180 -44.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## JarTrtExpsd -0.239
#look at autocorrelation.
acf(resid(larvgrowareafin)) #this looks fine to me.
#plot growth per day
ggplot(LarvaeDat,
aes(fill = WCa_pHDIC, y = AreaGrowthPerDay, x = JarOmegaCalcite)) +
geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
ggtitle("Larvae growth by larvae treatment")+
ylab("Area Growth (um/day)")+
theme_classic()
#get means and SE of larvae growth
meanareagrowth<- aggregate(AreaGrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
seareagrowth<- aggregate(AreaGrowthPerDay~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)
#use a bargraph to plot the mean Larvae area +/- SE
ggplot(data = meanareagrowth, aes(x = ParentTrt, y = AreaGrowthPerDay, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Growth (um2/day)") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanareagrowth$AreaGrowthPerDay-seareagrowth$AreaGrowthPerDay, ymax=meanareagrowth$AreaGrowthPerDay+seareagrowth$AreaGrowthPerDay),width=0.2, position=position_dodge(.9))+
theme_classic()
#now plot the difference between parental treatments for larvae growth
areagrowthdiff <- data.frame(tapply(LarvaeDat$AreaGrowthPerDay, list(LarvaeDat$CrossID, LarvaeDat$JarTrt), mean)) %>%
rownames_to_column("CrossID") %>%
mutate(CrossTemp = CrossID) %>%
mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
areagrowthdiff$CrossID<- as.factor(areagrowthdiff$CrossID)
areagrowthdiff$FemaleID<-as.factor(areagrowthdiff$FemaleID)
areagrowthdiff$MaleID<-as.factor(areagrowthdiff$MaleID)
#look at cross differences by Male and Female ID
ggplot(data = areagrowthdiff, aes(x = MaleID, y = Diff)) +
geom_point(aes(colour=FemaleID)) +
labs(x = "Male ID", y = "Change in larvae growth area (um2/day) per family\nfrom control to OA conditions") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 69 rows containing missing values (geom_point).
#get the means of the differences to visualize overall differences
meanardiff<- aggregate(Diff~ParentTrt, data=areagrowthdiff, FUN=mean)
seardiff<- aggregate(Diff~ParentTrt, data=areagrowthdiff, FUN=se)
ggplot(data = meanardiff, aes(x = ParentTrt, y = Diff)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge") +
labs(x = "Parental Treatment", y = "Change in larvae growth area (um2) per family\nfrom control to OA conditions") +
scale_x_discrete(breaks = c("Control", "Exposed"), labels = c("Control", "OA")) +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meanardiff$Diff-seardiff$Diff, ymax=meanardiff$Diff+seardiff$Diff),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
diffarttest<- t.test(Diff~ParentTrt, data=areagrowthdiff)
diffarttest #significant effect of parent treatment on growth.
##
## Welch Two Sample t-test
##
## data: Diff by ParentTrt
## t = -1.7272, df = 39.691, p-value = 0.09192
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -56.179320 4.412162
## sample estimates:
## mean in group Control mean in group Exposed
## -364.4143 -338.5307
Characterize the larvae by the ratio of major and minor axis
#make a column for the ratio of major to minor axis
LarvaeDat$MajMinRat<- LarvaeDat$LarvaeMajorAxisLengthPix/LarvaeDat$LarvaeMinorAxisLengthPix
#get the mean ratio for the treatments to plot
meanrat<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
serat<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))
ggplot(data = meanrat, aes(x = ParentTrt, y = MajMinRat, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Ratio of Major to Minor Axis") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanrat$MajMinRat-serat$MajMinRat, ymax=meanrat$MajMinRat+serat$MajMinRat),width=0.2, position=position_dodge(.9))+
theme_classic()
#quick test to see if there is a significant difference
larvrat<-lmer(MajMinRat~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|JarSeatable)+(1|JarID) , data=LarvaeDat)
lmrat1<- lm(MajMinRat~WCa_pHDIC*JarOmegaCalcite, data=LarvaeDat)
plot(resid(lmrat1)~LarvaeDat$FemaleID)
#check the model to see that it meets assumptions
plot(larvrat) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvrat))
qqline(resid(larvrat)) #has relatively long tails, doesn't seem to meet assumption of normality.
summary(larvrat) #no significant differences, but interaction between parent treatment and larval treatment are both nearly significant
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: MajMinRat ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) + (1 |
## MaleID) + (1 | JarSeatable) + (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: -12932.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7873 -0.5941 -0.0385 0.5528 4.5524
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 1.269e-04 0.011266
## FemaleID (Intercept) 8.012e-05 0.008951
## MaleID (Intercept) 9.268e-06 0.003044
## JarSeatable (Intercept) 1.335e-06 0.001155
## Residual 1.736e-03 0.041667
## Number of obs: 3758, groups:
## JarID, 377; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 1.173838 0.005218 55.800000 224.959
## WCa_pHDIC -0.010436 0.005719 59.000000 -1.825
## JarOmegaCalcite -0.008702 0.005459 333.000000 -1.594
## WCa_pHDIC:JarOmegaCalcite 0.011422 0.005971 331.900000 1.913
## Pr(>|t|)
## (Intercept) <2e-16 ***
## WCa_pHDIC 0.0731 .
## JarOmegaCalcite 0.1119
## WCa_pHDIC:JarOmegaCalcite 0.0566 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.870
## JarOmegClct -0.572 0.507
## WC_HDIC:JOC 0.509 -0.571 -0.892
#look at autocorrelation.
acf(resid(larvrat)) #this looks fine to me.
Look at eccentricity
#look at the eccentricity data
hist(LarvaeDat$LarvaeEccentricity)#data are left skewed.
#get the mean eccentricity for the treatments to plot
meaneccen<- aggregate(LarvaeEccentricity~ParentTrt*JarTrt, data=LarvaeDat, FUN=mean)
seeccen<- aggregate(LarvaeEccentricity~ParentTrt*JarTrt, data=LarvaeDat, FUN=se)
ggplot(data = meaneccen, aes(x = ParentTrt, y = LarvaeEccentricity, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Eccentricity") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meaneccen$LarvaeEccentricity-seeccen$LarvaeEccentricity, ymax=meaneccen$LarvaeEccentricity+seeccen$LarvaeEccentricity),width=0.2, position=position_dodge(.9))+
theme_classic()
#quick test to see if there is a significant difference
larveccen<-lmer(LarvaeEccentricity~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)
steppedlarveccen<- step(larveccen)
print(steppedlarveccen)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.03 1 1 0.8571
## CrossID 0.13 1 2 0.7218
## JarSeatable 0.20 1 3 0.6543
## AccTankID 0.54 1 4 0.4628
## MaleID 0.56 1 5 0.4531
## Block 2.50 1 6 0.1138
## FemaleID 34.33 1 kept <1e-07
## JarID 76.42 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value elim.num
## WCa_pHDIC:JarOmegaCalcite 0.0093 0.0093 1 345.60 3.0753 1
## WCa_pHDIC 0.0028 0.0028 1 25.35 0.9415 2
## JarOmegaCalcite 0.0152 0.0152 1 345.45 5.0440 kept
## Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 0.0804
## WCa_pHDIC 0.3411
## JarOmegaCalcite 0.0253
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = LarvaeEccentricity ~ JarOmegaCalcite + (1 |
## FemaleID) + (1 | JarID), data = LarvaeDat)
#final model only had JarTrt
larveccenfin<- lmer(LarvaeEccentricity~JarOmegaCalcite+ (1|FemaleID)+(1|JarID), data=LarvaeDat)
#check the model to see that it meets assumptions
plot(larveccenfin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larveccenfin))
qqline(resid(larveccenfin)) #has relatively long tails, doesn't meet assumption of normality.
summary(larveccenfin) #significant effect of JarTrt
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: LarvaeEccentricity ~ JarOmegaCalcite + (1 | FemaleID) + (1 |
## JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: -10830.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7095 -0.4842 0.0492 0.5902 3.4868
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 0.0002857 0.01690
## FemaleID (Intercept) 0.0001422 0.01192
## Residual 0.0030231 0.05498
## Number of obs: 3758, groups: JarID, 377; FemaleID, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.027e-01 3.262e-03 6.060e+01 154.110 <2e-16 ***
## JarOmegaCalcite 7.725e-03 3.440e-03 3.455e+02 2.246 0.0253 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## JarOmegClct -0.596
#look at autocorrelation.
acf(resid(larveccenfin)) #this looks fine to me.
#see if a square root transformation helps
larveccent<-lmer((LarvaeEccentricity^2)~JarOmegaCalcite+ (1|FemaleID)+(1|JarID) , data=LarvaeDat)
#check the model to see that it meets assumptions
plot(larveccent) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larveccent))
qqline(resid(larveccent)) #has relatively long tails, doesn't meet assumption of normality. transformation helped
summary(larveccent) #no significant differences
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: (LarvaeEccentricity^2) ~ JarOmegaCalcite + (1 | FemaleID) + (1 |
## JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: -11156.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4618 -0.5548 0.0090 0.5923 3.8526
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 0.0002415 0.01554
## FemaleID (Intercept) 0.0001314 0.01146
## Residual 0.0027820 0.05274
## Number of obs: 3758, groups: JarID, 377; FemaleID, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.586e-01 3.101e-03 5.960e+01 83.40 <2e-16 ***
## JarOmegaCalcite 3.754e-03 3.235e-03 3.472e+02 1.16 0.247
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## JarOmegClct -0.589
Look at larvae perimeter
hist(LarvaeDat$LarvaePerimeterum)
#test to see if there is a significant difference
larvperim<-lmer(LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)+(1|TrtTankID)+(1|AccTankID)+(1|Block) , data=LarvaeDat)
steppedlarvperim<- step(larvperim)
print(steppedlarvperim)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## TrtTankID 0.00 1 1 1.0000
## AccTankID 0.00 1 2 1.0000
## Block 0.00 1 3 1.0000
## FemaleID 24.69 1 kept 0
## MaleID 30.84 1 kept <1e-07
## CrossID 2.82 1 kept 0.0931
## JarSeatable 4.94 1 kept 0.0263
## JarID 555.87 1 kept <1e-07
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value
## WCa_pHDIC 13.3777 13.3777 1 34.45 0.1552
## JarOmegaCalcite 26016.3238 26016.3238 1 274.46 301.8615
## WCa_pHDIC:JarOmegaCalcite 341.4119 341.4119 1 274.81 3.9613
## elim.num Pr(>F)
## WCa_pHDIC kept 0.6960
## JarOmegaCalcite kept <1e-07
## WCa_pHDIC:JarOmegaCalcite kept 0.0475
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = LarvaePerimeterum ~ WCa_pHDIC + JarOmegaCalcite +
## (1 | FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) +
## (1 | JarID) + WCa_pHDIC:JarOmegaCalcite, data = LarvaeDat)
#final model chosen by the step function: LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID)
larvperimfin<- lmer(LarvaePerimeterum~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)
#check the model to see that it meets assumptions
plot(larvperimfin) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvperimfin))
qqline(resid(larvperimfin)) #has relatively long tails, doesn't meet assumption of normality.
summary(larvperimfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## LarvaePerimeterum ~ WCa_pHDIC * JarOmegaCalcite + (1 | FemaleID) +
## (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) + (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: 28176.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8942 -0.5378 0.0467 0.5950 4.8692
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 37.253 6.104
## CrossID (Intercept) 6.961 2.638
## FemaleID (Intercept) 51.519 7.178
## MaleID (Intercept) 55.727 7.465
## JarSeatable (Intercept) 1.543 1.242
## Residual 86.186 9.284
## Number of obs: 3758, groups:
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 193.856 4.642 33.250 41.758 <2e-16 ***
## WCa_pHDIC -1.938 4.919 34.450 -0.394 0.6960
## JarOmegaCalcite 37.418 2.154 274.460 17.374 <2e-16 ***
## WCa_pHDIC:JarOmegaCalcite 4.687 2.355 274.810 1.990 0.0475 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.865
## JarOmegClct -0.252 0.229
## WC_HDIC:JOC 0.222 -0.254 -0.893
#look at autocorrelation.
acf(resid(larvperimfin)) #this looks fine to me.
ggplot(LarvaeDat,
aes(fill = WCa_pHDIC, y = LarvaePerimeterum, x = JarOmegaCalcite)) +
geom_point(aes(shape= Block, colour=WCa_pHDIC)) +
ggtitle("Larvae area by larvae treatment")+
ylab("Larvae Perimeter (um)")+
theme_classic()
meanperim<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(mean, na.rm=T))
seperim<- ddply(LarvaeDat, .(JarTrt, ParentTrt), numcolwise(se, na.rm=T))
ggplot(data = meanperim, aes(x = ParentTrt, y = LarvaePerimeterum, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Parent Treatment", y = "Larvae Perimeter (um)") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meanperim$LarvaePerimeterum-seperim$LarvaePerimeterum, ymax=meanperim$LarvaePerimeterum+seperim$LarvaePerimeterum),width=0.2, position=position_dodge(.9))+
theme_classic()
#see if a square root transformation helps
larvperimfint<- lmer(sqrt(LarvaePerimeterum)~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|JarID), data=LarvaeDat)
#check the model to see that it meets assumptions
plot(larvperimfint) #seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(larvperimfint))
qqline(resid(larvperimfint)) #has relatively long tails, doesn't meet assumption of normality. #transformation didn't help
summary(larvperimfint) #significant effect of JarOmegaCalcite and signficant interaction
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: sqrt(LarvaePerimeterum) ~ WCa_pHDIC * JarOmegaCalcite + (1 |
## FemaleID) + (1 | MaleID) + (1 | CrossID) + (1 | JarSeatable) +
## (1 | JarID)
## Data: LarvaeDat
##
## REML criterion at convergence: 2943.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8139 -0.5169 0.0547 0.5830 4.7654
##
## Random effects:
## Groups Name Variance Std.Dev.
## JarID (Intercept) 0.045689 0.21375
## CrossID (Intercept) 0.007704 0.08778
## FemaleID (Intercept) 0.063435 0.25186
## MaleID (Intercept) 0.069253 0.26316
## JarSeatable (Intercept) 0.001854 0.04306
## Residual 0.103673 0.32198
## Number of obs: 3758, groups:
## JarID, 377; CrossID, 81; FemaleID, 28; MaleID, 24; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.93075 0.16302 33.25000 85.452 <2e-16
## WCa_pHDIC -0.06646 0.17270 34.46000 -0.385 0.703
## JarOmegaCalcite 1.26126 0.07529 272.81000 16.753 <2e-16
## WCa_pHDIC:JarOmegaCalcite 0.16579 0.08233 273.20000 2.014 0.045
##
## (Intercept) ***
## WCa_pHDIC
## JarOmegaCalcite ***
## WCa_pHDIC:JarOmegaCalcite *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) WC_pHDIC JrOmgC
## WCa_pHDIC -0.865
## JarOmegClct -0.251 0.228
## WC_HDIC:JOC 0.221 -0.253 -0.893
Try different type of visualization: Make plot that looks at morphology by larvae and parent treatment
par(mar = c(5, 4, 0.5, 0.5), bg = "transparent")
boxplot(LarvByJarDat$LarvaeDiamum ~ LarvByJarDat$JarTrt * LarvByJarDat$ParentTrt,
xlim = c(0, 5),
xlab = "Larvae Treatment",
ylab = "Diameter (um)",
ylim = c(0,85),
names = c("Control", "OA Exposed", "Control", "OA Exposed"),
col = c("powderblue", "red"))
rect(xleft = -0.2,
ybottom = 0,
xright = 2.5,
ytop = 85,
border = FALSE,
col = rgb(0, 0, 0.5, alpha = 0.1))
rect(xleft = 2.5,
ybottom = 0,
xright = 5.2,
ytop = 85,
border = FALSE,
col = rgb(0.5, 0, 0, alpha = 0.1))
text(x = 1.25, y = 84, "Control Parents")
text(x = 3.75, y = 84, "OA Exposed Parents")
boxplot(LarvByJarDat$LarvaeDiamum ~ LarvByJarDat$JarTrt * LarvByJarDat$ParentTrt,
xlim = c(0, 5),
xlab = "Larvae Treatment",
ylab = "Diameter (um)",
names = c("Control", "OA Exposed", "Control", "OA Exposed"),
col = c("powderblue", "red"),
add = TRUE)
par(mfcol=c(1,1))
Try to analyze survival data
boxplot(PropSurvived~ParentTrt*JarTrt, data=LarvByJarDat)
boxplot(PropSurvived~CrossID*JarTrt, data=LarvByJarDat)
#I don't know what we want to do about the larvae with proportion survived greater than one. For now, I will remove them.
LarvByJarDatSurv<- subset(LarvByJarDat, PropSurvived<=1)
boxplot(PropSurvived~ParentTrt*JarTrt, data=LarvByJarDatSurv)
#create linear mixed model
surv1<- lmer(PropSurvived~WCa_pHDIC*JarOmegaCalcite+ (1|FemaleID)+(1|MaleID)+(1|CrossID)+(1|JarSeatable)+(1|TrtTankID)+(1|AccTankID), data=LarvByJarDatSurv)
survstep<- step(surv1)
print(survstep)
##
## Random effects:
## Chi.sq Chi.DF elim.num p.value
## AccTankID 0.01 1 1 0.9259
## FemaleID 0.07 1 2 0.7923
## TrtTankID 1.20 1 3 0.2734
## MaleID 1.13 1 4 0.2875
## CrossID 249.06 1 kept <1e-07
## JarSeatable 5.64 1 kept 0.0175
##
## Fixed effects:
## Sum Sq Mean Sq NumDF DenDF F.value elim.num
## WCa_pHDIC:JarOmegaCalcite 0.0001 0.0001 1 190.04 0.0103 1
## WCa_pHDIC 0.0069 0.0069 1 38.00 1.2401 2
## JarOmegaCalcite 0.2660 0.2660 1 191.19 48.0394 kept
## Pr(>F)
## WCa_pHDIC:JarOmegaCalcite 0.9192
## WCa_pHDIC 0.2724
## JarOmegaCalcite <1e-07
##
## Least squares means:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Differences of LSMEANS:
## Estimate Standard Error DF t-value Lower CI Upper CI p-value
##
## Final model:
## lme4::lmer(formula = PropSurvived ~ JarOmegaCalcite + (1 | CrossID) +
## (1 | JarSeatable), data = LarvByJarDatSurv)
#final model chosen by step function: PropSurvived~JarOmegaCalcite+ (1|CrossID)+(1|JarSeatable)
survfin<- lmer(PropSurvived~JarOmegaCalcite+(1|CrossID)+(1|JarSeatable), data=LarvByJarDatSurv)
plot(survfin) #this looks fine to me
qqnorm(resid(survfin))
qqline(resid(survfin)) #I think this is fine
acf(survfin)
## Error in complete.cases(object): invalid 'type' (S4) of argument
summary(survfin)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## PropSurvived ~ JarOmegaCalcite + (1 | CrossID) + (1 | JarSeatable)
## Data: LarvByJarDatSurv
##
## REML criterion at convergence: -406.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7631 -0.4894 0.0348 0.5565 2.3172
##
## Random effects:
## Groups Name Variance Std.Dev.
## CrossID (Intercept) 0.026348 0.16232
## JarSeatable (Intercept) 0.000337 0.01836
## Residual 0.005536 0.07441
## Number of obs: 236, groups: CrossID, 41; JarSeatable, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.54840 0.02897 33.45000 18.932 < 2e-16 ***
## JarOmegaCalcite 0.07939 0.01145 191.19000 6.931 6.23e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## JarOmegClct -0.263
#Jar treatment significantly affects survival
ggplot(LarvByJarDatSurv,
aes(fill = WCa_pHDIC, y = PropSurvived, x = JarOmegaCalcite)) +
geom_point(aes(colour=WCa_pHDIC)) +
ggtitle("Larvae survival by larvae treatment")+
ylab("Proportion of Larvae that Survived")+
theme_classic()
meansurv<- ddply(LarvByJarDatSurv, .(JarTrt, CrossID), numcolwise(mean, na.rm=T))
sesurv<- ddply(LarvByJarDatSurv, .(JarTrt, CrossID), numcolwise(se, na.rm=T))
#make a shorter name for the CrossIDs
meansurv$Cross<- substr(meansurv$CrossID, 1, 9)
sesurv$Cross<- substr(sesurv$CrossID, 1, 9)
ggplot(data = meansurv, aes(x = Cross, y = PropSurvived, fill=JarTrt)) +
geom_bar(stat="identity", aes(fill=JarTrt), position="dodge")+
labs(x = "Cross ID", y = "Proportion of Larvae that Survived") +
scale_fill_manual(name = "Larvae Treatment", labels = c("Control", "OA Exposed"), values = c("skyblue2", "red2")) +
geom_errorbar(aes(ymin= meansurv$PropSurvived-sesurv$PropSurvived, ymax=meansurv$PropSurvived+sesurv$PropSurvived),width=0.2, position=position_dodge(.9))+
theme(axis.text.x=element_text(angle=90))
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#EF03_EM05 only has control because the exposed all had survival >1.
#now just plot the JarTrt
meanjarsurv<- ddply(LarvByJarDatSurv, .(JarTrt), numcolwise(mean, na.rm=T))
sejarsurv<- ddply(LarvByJarDatSurv, .(JarTrt), numcolwise(se, na.rm=T))
ggplot(data = meanjarsurv, aes(x = JarTrt, y = PropSurvived)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge")+
labs(x = "Jar Treatment", y = "Proportion of Larvae that Survived") +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meanjarsurv$PropSurvived-sejarsurv$PropSurvived, ymax=meanjarsurv$PropSurvived+sejarsurv$PropSurvived),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
#try to use the cross difference code to look at the difference in survival.
survdiff <- data.frame(tapply(LarvByJarDat$PropSurvived, list(LarvByJarDat$CrossID, LarvByJarDat$JarTrt), mean)) %>%
rownames_to_column("CrossID") %>%
mutate(CrossTemp = CrossID) %>%
mutate(ParentTrt = ifelse(startsWith(CrossID, "E"), "Exposed", "Control")) %>%
separate(CrossTemp, c("FemaleID", "MaleID", "BlockID", "Fourth"), sep="_") %>% select(-Fourth) %>%
mutate(Diff = Exposed - Control)
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1 rows [1].
survdiff$CrossID<- as.factor(survdiff$CrossID)
survdiff$FemaleID<-as.factor(survdiff$FemaleID)
survdiff$MaleID<-as.factor(survdiff$MaleID)
#look at cross differences by Male and Female ID
ggplot(data = survdiff, aes(x = FemaleID, y = Diff)) +
geom_point(aes(colour=MaleID)) +
labs(x = "Female ID", y = "Change in larvae survival per family\nfrom control to OA conditions") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"), panel.border=element_rect(colour="black", fill=NA))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
## Warning: Removed 70 rows containing missing values (geom_point).
#get the means of the differences to visualize overall differences
meansurvdiff<- ddply(survdiff, .(ParentTrt), numcolwise(mean, na.rm=T))
sesurvdiff<- ddply(survdiff, .(ParentTrt), numcolwise(se, na.rm=T))
ggplot(data = meansurvdiff, aes(x = ParentTrt, y = Diff)) +
geom_bar(stat="identity", aes(fill="gray45"), position="dodge") +
labs(x = "Parental Treatment", y = "Change in larvae survival per family\nfrom control to OA conditions") +
scale_fill_manual(values = "gray45") +
geom_errorbar(aes(ymin= meansurvdiff$Diff-sesurvdiff$Diff, ymax=meansurvdiff$Diff+sesurvdiff$Diff),width=0.2, position=position_dodge(.9))+
theme(legend.position = "none", panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "black"))
diffsurvttest<- aov(Diff~ParentTrt, data=survdiff)
anova(diffsurvttest) #no significant effect of parental treatment
## Analysis of Variance Table
##
## Response: Diff
## Df Sum Sq Mean Sq F value Pr(>F)
## ParentTrt 1 0.01155 0.011555 0.7309 0.3974
## Residuals 42 0.66397 0.015809
Explore cilia extrusion.
#Plot cilia extrusion by size to see if there is a relationship (see Waldbusser et al 2015)
#to do this Elise looked at a subset of Larvae for each jar treatment.
par(mfcol=c(1,1))
testcil<- read.csv("../data/LarvMorphTest.csv")
testcil$CilExtruded<- as.factor(testcil$CilExtruded)
testcilsub<- subset(testcil, CilExtruded !="")
testcilsub$Flag<- as.factor(testcilsub$Flag)
testcilsub<- subset(testcilsub, Flag !="TRUE")
#take a look at the data
ggplot(testcilsub,
aes(fill = JarTrt, y = MaxFeretDiamum, x = CilExtruded)) +
geom_point(aes( shape=JarTrt, colour=JarTrt)) +
ggtitle("Larvae cilia extrusion by size")+
ylab("Larvae Diameter (um)")+
facet_grid(~JarTrt)+
theme_classic()
#test for a relationship between size and cilia extruded within each jar treatment type.
exp<- subset(testcilsub, JarTrt=="Exposed")
con<- subset(testcilsub, JarTrt=="Control")
expcil1<-glm(CilExtruded~MaxFeretDiamum, data=exp, family=binomial)
summary(expcil1) #no significant relationship between size and cilia extrusion
##
## Call:
## glm(formula = CilExtruded ~ MaxFeretDiamum, family = binomial,
## data = exp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7172 0.7201 0.7444 0.7527 0.7725
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.566735 3.046585 0.186 0.852
## MaxFeretDiamum 0.008725 0.047015 0.186 0.853
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 182.22 on 163 degrees of freedom
## Residual deviance: 182.18 on 162 degrees of freedom
## AIC: 186.18
##
## Number of Fisher Scoring iterations: 4
boxplot(MaxFeretDiamum~CilExtruded, data=exp)
#now test the control jars for relationship between size and cilia extrusion.
concil1<-glm(CilExtruded~MaxFeretDiamum, data=con, family=binomial)
summary(concil1) #looks like there is a significant relationship between size and cilia extrusion
##
## Call:
## glm(formula = CilExtruded ~ MaxFeretDiamum, family = binomial,
## data = con)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9324 -0.3705 -0.3098 -0.2538 2.8520
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 14.8599 7.5190 1.976 0.0481 *
## MaxFeretDiamum -0.2376 0.1026 -2.316 0.0206 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 82.805 on 179 degrees of freedom
## Residual deviance: 77.639 on 178 degrees of freedom
## AIC: 81.639
##
## Number of Fisher Scoring iterations: 6
boxplot(MaxFeretDiamum~CilExtruded, data=con)
Export Figures
plots.dir.path<- list.files(tempdir(), pattern="rs-graphics", full.names= TRUE)
plots.png.paths<- list.files(plots.dir.path, pattern=".png", full.names=TRUE)
file.copy(from=plots.png.paths, to="../results")
## logical(0)
From data exploration:
lm1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=LarvaeDat)
par(mfcol=c(2,2))
plot(lm1) #does not appear to meet assumptions of normality
par(mfcol=c(1,1))
#plot the residuals of lm1 vs. Female and Male ID
plot(resid(lm1)~LarvaeDat$FemaleID)
abline(0,0)
plot(resid(lm1)~LarvaeDat$MaleID)
abline(0,0)
#look at autocorrelation of lm1; elise is still struggling with this
#add female ID to the model as a random effect, but take out EggDiamum then plot the residuals against MaleID
lm1b<- lmer(LarvaeDiamum~ParentTrt*JarTrt+ (1|FemaleID), data=LarvaeDat)
summary(lm1b)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ ParentTrt * JarTrt + (1 | FemaleID)
## Data: LarvaeDat
##
## REML criterion at convergence: 21158.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.4403 -0.5024 0.0615 0.6160 4.0917
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 6.925 2.632
## Residual 15.858 3.982
## Number of obs: 3758, groups: FemaleID, 28
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 76.5288 0.7521 28.0000 101.748
## ParentTrt2800 -0.2925 1.0324 28.0000 -0.283
## JarTrtExposed -10.9849 0.2096 3751.0000 -52.420
## ParentTrt2800:JarTrtExposed 1.1578 0.3066 3753.0000 3.776
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ParentTrt2800 0.779016
## JarTrtExposed < 2e-16 ***
## ParentTrt2800:JarTrtExposed 0.000162 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrT2800 JrTrtE
## PrntTrt2800 -0.729
## JarTrtExpsd -0.193 0.141
## PrT2800:JTE 0.132 -0.217 -0.683
plot(lm1b) #this looks okay, seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(lm1b))
qqline(resid(lm1b)) #doesn't look like it meets the assumption of normality
#plot the residuals of lm1b vs. Male ID
plot(resid(lm1b)~LarvaeDat$MaleID)
abline(0,0)
#I think we probably want to include the male ID in the model
lm1c<- lmer(LarvaeDiamum~ParentTrt*JarTrt+ (1|FemaleID)+ (1|MaleID), data=LarvaeDat)
summary(lm1c)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: LarvaeDiamum ~ ParentTrt * JarTrt + (1 | FemaleID) + (1 | MaleID)
## Data: LarvaeDat
##
## REML criterion at convergence: 20435.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1926 -0.5092 0.0507 0.5987 4.5559
##
## Random effects:
## Groups Name Variance Std.Dev.
## FemaleID (Intercept) 8.920 2.987
## MaleID (Intercept) 8.831 2.972
## Residual 12.751 3.571
## Number of obs: 3758, groups: FemaleID, 28; MaleID, 24
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 76.7310 1.2200 41.0000 62.893
## ParentTrt2800 -0.6219 1.7100 40.0000 -0.364
## JarTrtExposed -11.0094 0.1883 3714.0000 -58.475
## ParentTrt2800:JarTrtExposed 1.1690 0.2755 3716.0000 4.243
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ParentTrt2800 0.718
## JarTrtExposed < 2e-16 ***
## ParentTrt2800:JarTrtExposed 2.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrT2800 JrTrtE
## PrntTrt2800 -0.713
## JarTrtExpsd -0.116 0.083
## PrT2800:JTE 0.079 -0.120 -0.683
plot(lm1c) #this looks okay, seems to meet assumption of linearity and homoscedasticity
qqnorm(resid(lm1c))
qqline(resid(lm1c)) #doesn't look like it meets the assumption of normality
#plot the residuals of lm1c vs. TimeFiltered
plot(resid(lm1c)~LarvaeDat$TimeFilter)
abline(0,0)
Move on to next linear model to at least get the code down
lm2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvaeDat)
#check assumptions for lm2
#Assumption 1: linearity:
plot(resid(lm2)~LarvaeDat$ParentTrt)
plot(resid(lm2)~LarvaeDat$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(lm2)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(lm2)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 48.486 < 2.2e-16 ***
## 3754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm2))
qqline(resid(lm2)) #does not meet assumption of normality
#will need to try to transform the data. I have been using the Larvae dataframe, not the LarbyJar dataframe. Is that the right call?
lm2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvaeDat)
#check assumptions for lm2
#Assumption 1: linearity:
plot(resid(lm2t)~LarvaeDat$ParentTrt)
plot(resid(lm2t)~LarvaeDat$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(lm2t)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(lm2t)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 76.191 < 2.2e-16 ***
## 3754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm2t))
qqline(resid(lm2t)) #still does not meet assumption of normality
#currently does not meet assumptions, but still write the code for looking at residuals for lm2
plot(resid(lm2t)~LarvaeDat$CrossID) # I don't see anything that would suggest we should account for this.
abline(0,0)
plot(resid(lm2t)~LarvaeDat$JarID) #some of the jars have unusually high or low resids, so will have to account for this.
abline(0,0)
plot(resid(lm2t)~LarvaeDat$TimeFilter) #we may want to account for this
abline(0,0)
plot(resid(lm2t)~LarvaeDat$BlockID.x) #does not seem to need to be accounted for
## Error in (function (formula, data = NULL, subset = NULL, na.action = na.fail, : invalid type (NULL) for variable 'LarvaeDat$BlockID.x'
#make a new model that accounts for JarID and TimeFilter then check assumptions again
lm3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=LarvaeDat)
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(lm3)~LarvaeDat$ParentTrt)
plot(resid(lm3)~LarvaeDat$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(lm3)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(lm3)~LarvaeDat$ParentTrt*LarvaeDat$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 43.401 < 2.2e-16 ***
## 3754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm3))
qqline(resid(lm3)) #still does not meet assumption of normality
#try transforming the data again
lm3t <- lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=Larvae)
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(lm3t)~Larvae$ParentTrt)
plot(resid(lm3t)~Larvae$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(lm3t)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(lm3t)~Larvae$ParentTrt*Larvae$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 73.343 < 2.2e-16 ***
## 3754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(lm3t))
qqline(resid(lm3t)) #still doesn't meet assumption of normality
Retry the above models but use the LarvByJar dataset
ggplot(LarvByJar,
aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)) +
geom_point(aes( shape=JarTrt, colour=ParentTrt)) +
ggtitle("Larvae diameter by egg diameter")+
ylab("Larvae Diameter (um)")+
facet_grid(~JarTrt)+
theme_classic()
## Warning: Removed 115 rows containing missing values (geom_point).
#start with a simple linear model
jar1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=LarvByJar, na.action="na.exclude")
par(mfcol=c(2,2))
plot(jar1) #does not appear to meet assumptions of normality, try transforming
par(mfcol=c(1,1))
#plot the residuals of lm1 vs. Female and Male ID
plot(resid(jar1)~LarvByJar$FemaleID)
abline(0,0)
plot(resid(jar1)~LarvByJar$MaleID)
abline(0,0)
#we will need to account for female and male ID
jar2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvByJar, na.action = "na.exclude")
#check assumptions for lm2
#Assumption 1: linearity:
plot(resid(jar2)~LarvByJar$ParentTrt)
plot(resid(jar2)~LarvByJar$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(jar2)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(jar2)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 4.5575 0.003765 **
## 373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar2))
qqline(resid(jar2)) #does not meet assumption of normality
#will need to try to transform the data. I have been using the Larvae dataframe, not the LarbyJar dataframe. Is that the right call?
jar2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=LarvByJar, na.action= "na.exclude")
#check assumptions for lm2
#Assumption 1: linearity:
plot(resid(jar2t)~LarvByJar$ParentTrt)
plot(resid(jar2t)~LarvByJar$JarTrt)
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(jar2t)# seems to meet assumption of homoscedasticity but check with Levene's test:
leveneTest(resid(jar2t)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 3 6.2957 0.0003553 ***
## 373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar2t))
qqline(resid(jar2t)) #still does not meet assumption of normality
#currently does not meet assumptions, but still write the code for looking at residuals for jar2
plot(resid(jar2t)~LarvByJar$CrossID) # It seems that we will want to account for this.
abline(0,0)
plot(resid(jar2t)~LarvByJar$TimeFilter) #we will want to account for this
abline(0,0)
plot(resid(jar2t)~LarvByJar$BlockID.x)#I think we will need to account for this
## Error in (function (formula, data = NULL, subset = NULL, na.action = na.fail, : invalid type (NULL) for variable 'LarvByJar$BlockID.x'
#make a new model that accounts for CrossID, TimeFilter, and block then check assumptions again
jar3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|CrossID) + (1|TimeFilter) +(1|BlockID.x), data=LarvByJar, na.action="na.exclude")
## Error in eval(predvars, data, env): object 'BlockID.x' not found
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(jar3)~LarvByJar$ParentTrt)
## Error in resid(jar3): object 'jar3' not found
plot(resid(jar3)~LarvByJar$JarTrt)
## Error in resid(jar3): object 'jar3' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(jar3)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(jar3): object 'jar3' not found
leveneTest(resid(jar3)~LarvByJar$ParentTrt*LarvByJar$JarTrt, center=mean) #p<0.05 does not meet assumption of homoscedasticity
## Error in resid(jar3): object 'jar3' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(jar3))
## Error in resid(jar3): object 'jar3' not found
qqline(resid(jar3)) #does not meet assumption of normality
## Error in resid(jar3): object 'jar3' not found
Subset the data to only have Block 1 to see if that might resolve some of the abnormality problems
B1<- subset(Larvae, BlockID.x =="B1")
## Error in eval(e, x, parent.frame()): object 'BlockID.x' not found
ggplot(B1,
aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)) +
geom_point(aes( shape=JarTrt, colour=ParentTrt)) +
ggtitle("Larvae diameter by egg diameter")+
ylab("Larvae Diameter (um)")+
facet_grid(~JarTrt)+
theme_classic()
## Error in ggplot(B1, aes(fill = ParentTrt, y = LarvaeDiamum, x = EggDiamum)): object 'B1' not found
#start with a simple linear model
larv1<- lm(LarvaeDiamum~ParentTrt*JarTrt+EggDiamum, data=B1)
## Error in is.data.frame(data): object 'B1' not found
par(mfcol=c(2,2))
plot(larv1) #does not seem to be normally distributed
## Error in plot(larv1): object 'larv1' not found
par(mfcol=c(1,1))
#plot the residuals of lm1 vs. Female and Male ID
plot(resid(larv1)~B1$FemaleID)
## Error in resid(larv1): object 'larv1' not found
plot(resid(larv1)~B1$MaleID)
## Error in resid(larv1): object 'larv1' not found
#does not seem to be a problem
Move on to next linear model to at least get the code down
B1_2<-lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for B1_2
#Assumption 1: linearity:
plot(resid(B1_2)~B1$ParentTrt)
## Error in resid(B1_2): object 'B1_2' not found
plot(resid(B1_2)~B1$JarTrt)
## Error in resid(B1_2): object 'B1_2' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(B1_2)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(B1_2): object 'B1_2' not found
leveneTest(resid(B1_2)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 so does not meet assumption of homoscedasticity
## Error in resid(B1_2): object 'B1_2' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_2))
## Error in resid(B1_2): object 'B1_2' not found
qqline(resid(B1_2)) #does not meet assumption of normality
## Error in resid(B1_2): object 'B1_2' not found
#will need to try to transform the data.
B1_2t<-lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for B1_2t
#Assumption 1: linearity:
plot(resid(B1_2t)~B1$ParentTrt)
## Error in resid(B1_2t): object 'B1_2t' not found
plot(resid(B1_2t)~B1$JarTrt)
## Error in resid(B1_2t): object 'B1_2t' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(B1_2t)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(B1_2t): object 'B1_2t' not found
leveneTest(resid(B1_2t)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1_2t): object 'B1_2t' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_2t))
## Error in resid(B1_2t): object 'B1_2t' not found
qqline(resid(B1_2t)) #looks better maybe
## Error in resid(B1_2t): object 'B1_2t' not found
#code for looking at residuals for B1_2t
plot(resid(B1_2t)~B1$CrossID) # I don't see anything that would suggest we should account for this.
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$JarID)
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$TimeFilter) #we may want to account for this
## Error in resid(B1_2t): object 'B1_2t' not found
abline(0,0)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
plot(resid(B1_2t)~B1$BlockID.x) #does not seem to need to be accounted for
## Error in resid(B1_2t): object 'B1_2t' not found
#make a new model that accounts for JarID and TimeFilter then check assumptions again
B1_3 <- lmer(LarvaeDiamum~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(B1_3)~B1$ParentTrt)
## Error in resid(B1_3): object 'B1_3' not found
plot(resid(B1_3)~B1$JarTrt)
## Error in resid(B1_3): object 'B1_3' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(B1_3)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(B1_3): object 'B1_3' not found
leveneTest(resid(B1_3)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 does not meet assumption of homoscedasticity
## Error in resid(B1_3): object 'B1_3' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_3))
## Error in resid(B1_3): object 'B1_3' not found
qqline(resid(B1_3)) #still does not meet assumption of normality
## Error in resid(B1_3): object 'B1_3' not found
#try transforming the data again
B1_3t <- lmer(log(LarvaeDiamum)~ParentTrt*JarTrt + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(B1_3t)~B1$ParentTrt)
## Error in resid(B1_3t): object 'B1_3t' not found
plot(resid(B1_3t)~B1$JarTrt)
## Error in resid(B1_3t): object 'B1_3t' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(B1_3t)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(B1_3t): object 'B1_3t' not found
leveneTest(resid(B1_3t)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1_3t): object 'B1_3t' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1_3t))
## Error in resid(B1_3t): object 'B1_3t' not found
qqline(resid(B1_3t)) #still doesn't meet assumption of normality
## Error in resid(B1_3t): object 'B1_3t' not found
I want to try to do the stats with the pH values from the jars. I don’t have the pH values for the adult tank water chem yet
B1tlm <- lmer(log(LarvaeDiamum)~ParentTrt*JarpH + (1|FemaleID)+(1|MaleID) + (1|JarID) + (1|TimeFilter), data=B1)
## Error: 'data' not found, and some variables missing from formula environment
#check assumptions for lm3
#Assumption 1: linearity:
plot(resid(B1tlm)~B1$ParentTrt)
## Error in resid(B1tlm): object 'B1tlm' not found
plot(resid(B1tlm)~B1$JarTrt)
## Error in resid(B1tlm): object 'B1tlm' not found
#seem to meet assumption of linearity
#Assumption 2: Homoscedasticity
plot(B1tlm)# seems to meet assumption of homoscedasticity but check with Levene's test:
## Error in plot(B1tlm): object 'B1tlm' not found
leveneTest(resid(B1tlm)~B1$ParentTrt*B1$JarTrt, center=mean) #p<0.05 still does not meet assumption of homoscedasticity
## Error in resid(B1tlm): object 'B1tlm' not found
#Assumption 3: residuals are normally distributed
qqnorm(resid(B1tlm))
## Error in resid(B1tlm): object 'B1tlm' not found
qqline(resid(B1tlm)) #doesn't meet assumption of normality
## Error in resid(B1tlm): object 'B1tlm' not found
BELOW CODE HASN’T BEEN CHECKED IN MARCH
Subset data by larvae treatment and parent treatment
exppar <- LarvaeMorph %>% filter(CrossType == “Exposed”) conpar <- LarvaeMorph %>% filter(CrossType == “Control”) #subset the data into exposed and control larvae treatments explar <- LarvaeMorph %>% filter(JarTrt == “Exposed”) conlar <- LarvaeMorph %>% filter(JarTrt == “Control”)
expparcalc <- LarvaeCalc %>% filter(CrossType == “Exposed”) conparcalc <- LarvaeCalc %>% filter(CrossType == “Control”) #subset the data into exposed and control larvae treatments explarcalc <- LarvaeCalc %>% filter(JarTrt == “Exposed”) conlarcalc <- LarvaeCalc %>% filter(JarTrt == “Control”)
Start by looking at the relationship between egg size and larvae size
ggplot(data=LarvaeCalc, aes(x=EggAreauM2, y=SurfaceAreaum2, fill=JarTrt))+ geom_point(aes(shape=CrossType, colour=JarTrt))+ theme_classic()
egglarvsize<- lm(SurfaceAreaum2~EggAreauM2JarTrtCrossType, data=LarvaeCalc) plot(egglarvsize) #looks like it meets assumptions summary(egglarvsize) #egg area is not significant, but JarTrt is p=0.0389
Look at data that might be count or weight outliers
plot(F1LarvaeCount~PercentFert, data=LarvaeCalc, col=JarTrt) ggplot(data=LarvaeCalc, aes(x=mLJarActual, y=F1LarvaeCount))+ geom_point(aes(shape=JarTrt, colour=PercentFert))
plot(F1TotalLarvaeWtg~F1LarvaeCount, data=LarvaeCalc, col=JarTrt) plot(F1WtPerLarvae~F1LarvaeCount, data=LarvaeCalc, col=JarTrt) text(F1WtPerLarvae~F1LarvaeCount, labels=JarID, data=LarvaeCalc, font=2, cex=.9)
The plots below can be used for all morphology data: length, perimeter, surface area
ggplot(data = conpar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)
ggplot(data = exppar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)
ggplot(data = conpar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)
ggplot(data = exppar, aes(x = JarSeatable, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “Replicate”, y = “Shell Length (um)”, title = “Larvae length exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)
Make boxplots by parental cross, plotting both larvae treatments over one another. Can be used for all morphology parameters: surface area, length,
ggplot(data = LarvaeMorph, aes(x = CrossID, y = MaxFeretDiamum)) + geom_boxplot(aes(colour=JarTrt)) + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “Larvae lengths”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”))+ scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(LarvaeMorph\(MaxFeretDiamum)), ceiling(max(LarvaeMorph\)MaxFeretDiamum)), 5)) +
ggplot(data = colar, aes(x = CrossID, y = MaxFeretDiamum, fill = ParTrt)) + geom_boxplot() + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “Control Larvae”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_fill_manual(name = “Parental Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)), 5), limits = c(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))
ggplot(data = exlar, aes(x = CrossID, y = MaxFeretDiamum, fill = ParTrt)) + geom_boxplot() + labs(x = “Parental Cross”, y = “Shell Length (um)”, title = “OA Larvae”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_fill_manual(name = “Parental Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_y_continuous(breaks = seq(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)), 5), limits = c(floor(min(larvmorph\(MaxFeretDiamum)), ceiling(max(larvmorph\)MaxFeretDiamum)))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(color=“black”,fill=NA))+ facet_grid(.~CrossType, scale=“free_x”)
Plot calcification data
ggplot(data = conparcalc, aes(x = JarID, y = F1WtPerLarvae)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “F1 larvae count”, title = “Larvae count control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID, scale=“free_x”)
ggplot(data = expparcalc, aes(x = JarSeatable, y = F1WtPerLarvae)) + geom_point(aes(shape=JarTrt, colour=JarTrt))+ labs(x = “Replicate”, y = “F1 weight per larvae”, title = “Larvae weight exposed parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Larvae Treatment”, labels=c(“Control”,“OA”), values =c(19,17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”)) + facet_grid(.~CrossID)
ggplot(data = conparcalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “CrossID”, y = “F1 weight per larvae”, title = “Larvae weight control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”))
ggplot(data = expparcalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt))+ labs(x = “CrossID”, y = “F1 weight per larvae”, title = “Larvae weight control parents”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_rect(color=“black”, fill=NA), panel.background = element_rect(colour=“white”), axis.line = element_line(colour = “black”))
Make boxplots by parental cross, plotting both larvae treatments over one another.For calcification data; can also be used for cilia counts
ggplot(data = LarvaeCalc, aes(x = CrossID, y = F1WtPerLarvae)) + geom_boxplot(aes(colour=JarTrt)) + labs(x = “Parental Cross”, y = “F1 Weight per larvae (g)”, title = “Larvae weights”) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + scale_color_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”))+ scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(color=“black”,fill=NA))+ facet_grid(.~CrossType, scale=“free_x”)
Plots of difference between parental treatments for morphology
crossdiff <- data.frame(tapply(LarvaeMorph\(MaxFeretDiamum, list(LarvaeMorph\)CrossID, LarvaeMorph$JarTrt), median)) %>% rownames_to_column(“CrossID”) %>% mutate(CrossTemp = CrossID) %>% mutate(ParentTrt = ifelse(startsWith(CrossID, “E”), “Exposed”, “Control”)) %>% separate(CrossTemp, c(“FemaleID”, “MaleID”, “BlockID”, “Fourth”), sep=“_“) %>% select(-Fourth) %>% mutate(Diff = Exposed - Control)
crossdiff\(CrossID<- as.factor(crossdiff\)CrossID) crossdiff\(FemaleID<-as.factor(crossdiff\)FemaleID) crossdiff\(MaleID<-as.factor(crossdiff\)MaleID)
ggplot(data = crossdiff, aes(x = MaleID, y = Diff)) + geom_point(aes(shape=ParentTrt, colour=ParentTrt)) + labs(x = “Male ID”, y = “Change in median shell length (um) per familycontrol to OA conditions”) + scale_colour_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(19, 17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”), panel.border=element_rect(colour=“black”, fill=NA))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+ facet_grid(.~FemaleID, scale=“free_x”)
ggplot(data = crossdiff, aes(x = ParentTrt, y = Diff, fill = ParentTrt)) + geom_boxplot() + labs(x = “Parental Treatment”, y = “Change in median shell length (um) per familycontrol to OA conditions”) + scale_x_discrete(breaks = c(“Control”, “Exposed”), labels = c(“Control”, “OA”)) + scale_fill_manual(values = c(“#00BFC4”, “#F8766D”)) + theme(legend.position = “none”, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))
diffmean<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=mean) diffse<- aggregate(Diff~ParentTrt, data=crossdiff, FUN=se)
ggplot(data = diffmean, aes(x = ParentTrt, y = Diff, fill=“grey67”)) + geom_bar(stat=“identity”, position=“dodge”,aes(fill=“grey67”))+ scale_fill_manual(values=“grey67”)+ labs(x = “Parent Treatment”, y = “Change in Length”) + geom_errorbar(aes(ymin= diffmean\(Diff-diffse\)Diff, ymax=diffmean\(Diff+diffse\)Diff),width=0.2, position=position_dodge(.9))+ theme_classic()
Look at differences for calcification
crossdiffcalc <- data.frame(tapply(LarvaeCalc\(F1WtPerLarvae, list(LarvaeCalc\)CrossID, LarvaeCalc$JarTrt), median)) %>% rownames_to_column(“CrossID”) %>% mutate(CrossTemp = CrossID) %>% mutate(ParentTrt = ifelse(startsWith(CrossID, “E”), “Exposed”, “Control”)) %>% separate(CrossTemp, c(“FemaleID”, “MaleID”, “BlockID”, “Fourth”), sep=“_“) %>% select(-Fourth) %>% mutate(Diff = Exposed - Control)
crossdiffcalc\(CrossID<- as.factor(crossdiffcalc\)CrossID) crossdiffcalc\(FemaleID<-as.factor(crossdiffcalc\)FemaleID) crossdiffcalc\(MaleID<-as.factor(crossdiffcalc\)MaleID)
ggplot(data = crossdiffcalc, aes(x = FemaleID, y = Diff)) + geom_point(aes(shape=ParentTrt, colour=ParentTrt)) + labs(x = “Female ID”, y = “Change in median weight per familycontrol to OA conditions”) + scale_colour_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(“#00BFC4”, “#F8766D”)) + scale_shape_manual(name=“Parent treatment”, labels= c(“control”, “OA”), values = c(19, 17))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+ facet_grid(.~MaleID, scale=“free_x”)
ggplot(data = crossdiffcalc, aes(x = ParentTrt, y = Diff, fill = ParentTrt)) + geom_boxplot() + labs(x = “Parental Treatment”, y = “Change in median larvae weight per familycontrol to OA conditions”) + scale_x_discrete(breaks = c(“Control”, “Exposed”), labels = c(“Control”, “OA”)) + scale_fill_manual(values = c(“#00BFC4”, “#F8766D”)) + theme(legend.position = “none”, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = “black”))
Make plot that looks at calcification and cilia by larvae and parent treatment
par(mar = c(5, 4, 0.5, 0.5), bg = “transparent”) boxplot(LarvaeMorph\(PercentCilia ~ LarvaeMorph\)JarTrt * LarvaeMorph\(CrossType, xlim = c(0, 5), xlab = "Larvae Treatment", ylab = "Percent Cilia", names = c("Control", "OA", "Control", "OA"), col = c("powderblue", "red")) rect(xleft = -0.2, ybottom = 0, xright = 2.5, ytop = 1, border = FALSE, col = rgb(0, 0, 0.5, alpha = 0.1)) rect(xleft = 2.5, ybottom = 0, xright = 5.2, ytop = 1, border = FALSE, col = rgb(0.5, 0, 0, alpha = 0.1)) text(x = 1.25 , y = 0.000007, "Control Parents") text(x = 3.75, y = 0.000007, "OA Parents") boxplot(LarvaeMorph\)PercentCilia ~ LarvaeMorph\(JarTrt * LarvaeMorph\)CrossType, xlim = c(0, 5), xlab = “Larvae Treatment”, ylab = “”, names = c(“Control”, “OA”, “Control”, “OA”), col = c(“powderblue”, “red”), add = TRUE)
```
meancil<- aggregate(PercentCilia~CrossTypeJarTrt, data=LarvaeCalc, FUN= mean) secil<- aggregate(PercentCilia~CrossTypeJarTrt, data=LarvaeCalc, FUN= se) #plot the data as bargraphs with ggplot ggplot(data = meancil, aes(x = CrossType, y = PercentCilia, fill=JarTrt)) + geom_bar(stat=“identity”, aes(fill=JarTrt), position=“dodge”)+ labs(x = “Parent Treatment”, y = “Proportion Cilia Extruded”) + scale_fill_manual(name = “Larvae Treatment”, labels = c(“Control”, “OA”), values = c(“blue4”, “red2”)) + geom_errorbar(aes(ymin= meancil\(PercentCilia-secil\)PercentCilia, ymax=meancil\(PercentCilia+secil\)PercentCilia),width=0.2, position=position_dodge(.9))+ theme_classic()
Histdata<-unite(LarvaeCalc, Group, c(CrossType, JarTrt), sep=“_“) ggplot(Histdata, aes(x=PercentCilia)) + geom_histogram()+ facet_grid(~Group) #run analysis to look for significance aov1<- aov(PercentCilia~CrossType*JarTrt, data=LarvaeCalc) summary(aov1) shapiro.test(aov1$residuals) #does not meet assumption leveneTest(aov1)# meets assumption